Skip to content

Commit

Permalink
Merge pull request #118 from BritishGeologicalSurvey/ags-export
Browse files Browse the repository at this point in the history
Export of boreholes by IDs and polygon
  • Loading branch information
volcan01010 authored Jul 12, 2023
2 parents fbdafff + e21f605 commit 74fe9f7
Show file tree
Hide file tree
Showing 7 changed files with 617 additions and 72 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
__pycache__
venv
.venv
.idea
.coverage
htmlcov
1 change: 1 addition & 0 deletions app/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

# Define error responses
error_responses = {
status.HTTP_400_BAD_REQUEST: {"model": ErrorResponse},
status.HTTP_404_NOT_FOUND: {"model": ErrorResponse},
status.HTTP_422_UNPROCESSABLE_ENTITY: {"model": ErrorResponse},
status.HTTP_500_INTERNAL_SERVER_ERROR: {"model": ErrorResponse}
Expand Down
253 changes: 237 additions & 16 deletions app/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,19 @@
from fastapi.responses import FileResponse, StreamingResponse
from fastapi.exceptions import HTTPException

import shapely

from requests.exceptions import Timeout, ConnectionError, HTTPError

from app import conversion, validation
from app.checkers import check_ags, check_bgs
from app.errors import error_responses, InvalidPayloadError
from app.schemas import ValidationResponse
from app.schemas import ValidationResponse, BoreholeCountResponse

BOREHOLE_VIEWER_URL = "https://gwbv.bgs.ac.uk/GWBV/viewborehole?loca_id={bgs_loca_id}"
BOREHOLE_EXPORT_URL = "https://gwbv.bgs.ac.uk/ags_export?loca_ids={bgs_loca_id}"
BOREHOLE_INDEX_URL = ("https://ogcapi.bgs.ac.uk/collections/agsboreholeindex/items?f=json"
"&properties=bgs_loca_id&filter=INTERSECTS(shape,{polygon})&limit=10")

router = APIRouter()

Expand All @@ -36,6 +41,12 @@
"content": {"application/pdf": {}},
"description": "Return a graphical log of AGS data in .PDF format"}

ags_export_responses = dict(error_responses)
ags_export_responses['200'] = {
"content": {"application/x-zip-compressed": {}, "application/json": {}},
"description": ("Return a zip containing .ags file and metadata .txt file "
"or a json response containing the borehole ID count")}


# Enum for search logic
class Format(StrEnum):
Expand Down Expand Up @@ -113,6 +124,26 @@ class ResponseType(StrEnum):
example="20190430093402523419",
)

ags_export_query = Query(
...,
title="BGS LOCA ID",
description="A single ID or multiple IDs separated by semicolons",
example="20190430093402523419",
)

polygon_query = Query(
...,
title="POLYGON",
description="A polygon expressed in Well Known Text",
example="POLYGON((-4.5 56,-4 56,-4 55.5,-4.5 55.5,-4.5 56))",
)

count_only_query = Query(
default=False,
title='Return count only',
description='Return count of found boreholes only',
)

response_type_query = Query(
default=ResponseType.inline,
title='PDF Response Type',
Expand All @@ -133,6 +164,27 @@ async def validate(background_tasks: BackgroundTasks,
checkers: List[Checker] = validate_form,
fmt: Format = format_form,
request: Request = None):
"""
Validate an AGS4 file to the AGS File Format v4.x rules and the NGDC data submission requirements.
Uses the Official AGS4 Python Library.
:param background_tasks: Background tasks for deleting temporary directories.
:type background_tasks: BackgroundTasks
:param files: List of AGS4 files to be validated.
:type files: List[UploadFile]
:param std_dictionary: The standard dictionary to use for validation. Options are "BGS" or "AGS".
:type std_dictionary: Dictionary
:param checkers: List of validation rules to be used during validation.
:type checkers: List[Checker]
:param fmt: The format to return the validation results in. Options are "text" or "json".
:type fmt: Format
:param request: The request object.
:type request: Request
:return: A response with the validation results in either plain text or JSON format.
:rtype: Union[FileResponse, ValidationResponse]
:raises InvalidPayloadError: If the payload is missing files or checkers.
"""

if not files[0].filename or not checkers:
raise InvalidPayloadError(request)

Expand Down Expand Up @@ -168,6 +220,17 @@ async def validate(background_tasks: BackgroundTasks,
return response


def prepare_validation_response(request, data):
"""Package the data into a Response schema object"""
response_data = {
'msg': f'{len(data)} files validated',
'type': 'success',
'self': str(request.url),
'data': data,
}
return ValidationResponse(**response_data, media_type="application/json")


@router.post("/convert/",
tags=["convert"],
response_class=StreamingResponse,
Expand All @@ -179,6 +242,22 @@ async def convert(background_tasks: BackgroundTasks,
files: List[UploadFile] = conversion_file,
sort_tables: bool = sort_tables_form,
request: Request = None):
"""
Convert files between .ags and .xlsx format. Option to sort worksheets in .xlsx file in alphabetical order.
:param background_tasks: A background task that manages file conversion asynchronously.
:type background_tasks: BackgroundTasks
:param files: A list of files to be converted. Must be in .ags or .xlsx format.
:type files: List[UploadFile]
:param sort_tables: A boolean indicating whether to sort worksheets in the .xlsx file in alphabetical order.
:type sort_tables: bool
:param request: The HTTP request object.
:type request: Request
:return: A streaming response containing a .zip file with the converted files and a log file.
:rtype: StreamingResponse
:raises InvalidPayloadError: If the request payload is invalid.
:raises Exception: If the conversion fails or an unexpected error occurs.
"""

if not files[0].filename:
raise InvalidPayloadError(request)
RESULTS = 'results'
Expand Down Expand Up @@ -208,26 +287,30 @@ async def convert(background_tasks: BackgroundTasks,
return response


def prepare_validation_response(request, data):
"""Package the data into a Response schema object"""
response_data = {
'msg': f'{len(data)} files validated',
'type': 'success',
'self': str(request.url),
'data': data,
}
return ValidationResponse(**response_data, media_type="application/json")


@router.get("/ags_log/",
# tags=["ags_log"],
# summary="Generate Graphical Log",
# description="Generate a graphical log (.pdf) from AGS data held by the National Geoscience Data Centre.",
tags=["ags_log"],
summary="Generate Graphical Log",
description=("Generate a graphical log (.pdf) from AGS data "
"held by the National Geoscience Data Centre."),
include_in_schema=False,
response_class=Response,
responses=pdf_responses)
def get_ags_log(bgs_loca_id: int = ags_log_query,
def get_ags_log(bgs_loca_id: str = ags_log_query,
response_type: ResponseType = response_type_query):
"""
Get a graphical log (.pdf) for a single borehole in AGS format from the National Geoscience Data Centre.
:param bgs_loca_id: The unique identifier of the borehole to generate the log for.
:type bgs_loca_id: str
:param response_type: The type of response to return (e.g. 'attachment' to force download or 'inline' \
to display in browser).
:type response_type: ResponseType, optional
:return: A response containing a .pdf file with the generated borehole log.
:rtype: Response
:raises HTTPException 404: If the specified borehole does not exist or is confidential.
:raises HTTPException 500: If the borehole generator returns an error.
:raises HTTPException 500: If the borehole generator could not be reached.
"""

url = BOREHOLE_VIEWER_URL.format(bgs_loca_id=bgs_loca_id)

try:
Expand All @@ -251,3 +334,141 @@ def get_ags_log(bgs_loca_id: int = ags_log_query,
headers = {'Content-Disposition': f'{response_type.value}; filename="{filename}"'}

return Response(response.content, headers=headers, media_type='application/pdf')


@router.get("/ags_export/",
tags=["ags_export"],
summary="Export one or more boreholes in .ags format",
description=("Export one or more borehole in .ags format from AGS data "
"held by the National Geoscience Data Centre."),
include_in_schema=False,
response_class=Response,
responses=ags_export_responses)
def ags_export(bgs_loca_id: str = ags_export_query):
"""
Export a single borehole in .ags format from AGS data held by the National Geoscience Data Centre.
:param bgs_loca_id: The unique identifier of the borehole to export.
:type bgs_loca_id: str
:return: A response containing a .zip file with the exported borehole data.
:rtype: Response
:raises HTTPException 404: If the specified boreholes do not exist or are confidential.
:raises HTTPException 422: If more than 10 borehole IDs are supplied.
:raises HTTPException 500: If the borehole exporter returns an error.
:raises HTTPException 500: If the borehole exporter could not be reached.
"""

if len(bgs_loca_id.split(';')) > 10:
raise HTTPException(status_code=422, detail="More than 10 borehole IDs.")

url = BOREHOLE_EXPORT_URL.format(bgs_loca_id=bgs_loca_id)

try:
response = requests.get(url, timeout=10)
except (Timeout, ConnectionError):
raise HTTPException(status_code=500,
detail="The borehole exporter could not be reached. Please try again later.")

try:
response.raise_for_status()
except HTTPError:
if response.status_code == 404:
raise HTTPException(status_code=404,
detail=f"Failed to retrieve borehole {bgs_loca_id}. "
"It may not exist or may be confidential")
else:
raise HTTPException(status_code=500,
detail="The borehole exporter returned an error.")

headers = {'Content-Disposition': 'attachment; filename="boreholes.zip"'}

return Response(response.content, headers=headers, media_type='application/x-zip-compressed')


@router.get("/ags_export_by_polygon/",
tags=["ags_export_by_polygon"],
summary="Export a number of boreholes in .ags format",
description=("Export a number of boreholes in .ags format from AGS data "
"held by the National Geoscience Data Centre."),
include_in_schema=False,
response_model=BoreholeCountResponse,
responses=ags_export_responses)
def ags_export_by_polygon(polygon: str = polygon_query,
count_only: bool = count_only_query,
request: Request = None):
"""
Export the boreholes in .ags format from AGS data held by the National Geoscience Data Centre,
that are bounded by the polygon. If there are more than 10 boreholes return an error
:param polygon: A polygon in Well Known Text.
:type polygon: str
:param count_only: The format to return the validation results in. Options are "text" or "json".
:type count_only: int
:param request: The request object.
:type request: Request
:return: A response with the validation results in either plain text or JSON format.
:rtype: Union[BoreholeCountResponse, Response]
:return: A response containing a count or a .zip file with the exported borehole data.
:rtype: Response
:raises HTTPException 400: If there are no boreholes or more than 10 boreholes in the polygon.
:raises HTTPException 422: If the Well Known Text is not a POLYGON or is invalid.
:raises HTTPException 500: If the borehole index could not be reached.
:raises HTTPException 500: If the borehole index returns an error.
:raises HTTPException 500: If the borehole exporter could not be reached.
:raises HTTPException 500: If the borehole exporter returns an error.
"""

# Check explicitly that the WKT is a valid POLYGON
# The BOREHOLE_INDEX_URL API does not return an error for some bad WKT
try:
shapely.wkt.loads(polygon)
except shapely.errors.GEOSException:
raise HTTPException(status_code=422,
detail="Invalid polygon")

url = BOREHOLE_INDEX_URL.format(polygon=polygon)

try:
response = requests.get(url, timeout=10)
except (Timeout, ConnectionError):
raise HTTPException(status_code=500,
detail="The borehole index could not be reached. Please try again later.")

try:
response.raise_for_status()
except HTTPError:
if response.status_code == 404:
raise HTTPException(status_code=404,
detail="Failed to retrieve boreholes for the given polygon")
else:
raise HTTPException(status_code=500,
detail="The borehole index returned an error.")

collection = response.json()
count = collection['numberMatched']

if count_only:
response = prepare_count_response(request, count)
else:
if count == 0:
raise HTTPException(status_code=400,
detail="No boreholes found in the given polygon")
elif count > 10:
raise HTTPException(status_code=400,
detail=f"More than 10 boreholes ({count}) "
"found in the given polygon. Please try with a smaller polygon")

bgs_loca_ids = ';'.join([f['id'] for f in collection['features']])
url = BOREHOLE_EXPORT_URL.format(bgs_loca_id=bgs_loca_ids)
response = ags_export(bgs_loca_ids)

return response


def prepare_count_response(request, count):
"""Package the data into a BoreholeCountResponse schema object"""
response_data = {
'msg': 'Borehole count',
'type': 'success',
'self': str(request.url),
'count': count
}
return BoreholeCountResponse(**response_data, media_type="application/json")
4 changes: 4 additions & 0 deletions app/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,7 @@ class ErrorResponse(MinimalResponse):

class ValidationResponse(MinimalResponse):
data: List[Union[Validation, bool]] = None


class BoreholeCountResponse(MinimalResponse):
count: int = Field(..., example=4)
4 changes: 2 additions & 2 deletions app/static/js/map.js
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ var agsboreholes = L.featureGroup
"<b>Project Engineer: </b>" + properties.proj_eng + "<br>" +
"<b>Project Contractor: </b>" + properties.proj_cont + "<br>" +
"<b>Original LOCA ID: </b>" + properties.loca_id + "<br>" +
"<b>AGS Graphical Log: </b>" + "<a href=" + "https://agsapi.bgs.ac.uk/ags_log/?bgs_loca_id=" + properties.bgs_loca_id + " target=" + "_blank" + ">View</a> / " +"<a href=" + "https://agsapi.bgs.ac.uk/ags_log/?bgs_loca_id=" + properties.bgs_loca_id + "&response_type=attachment" + ">Download</a>" + "<br>" +
// "<b>AGS Data: </b>" + "<a href=" + "https://agsapi.bgs.ac.uk/ags_export/?bgs_loca_id=" + properties.bgs_loca_id + " target=" + "_blank" + ">Download</a>" + "<br>" +
"<b>AGS Graphical Log: </b>" + "<a href=" + "/ags_log/?bgs_loca_id=" + properties.bgs_loca_id + " target=" + "_blank" + ">View</a> / " +"<a href=" + "/ags_log/?bgs_loca_id=" + properties.bgs_loca_id + "&response_type=attachment" + ">Download</a>" + "<br>" +
"<b>AGS Data (NGDC Download Service): </b>" + "<a href=" + "/ags_export/?bgs_loca_id=" + properties.bgs_loca_id + " target=" + "_blank" + ">Download</a>" + "<br>" +
"<b>AGS Submission Record (raw data): </b>" + "<a href=" + properties.dad_item_url + " target=" + "_blank" + ">View</a>" + "<br>";
layer.bindPopup(popupContent);
},
Expand Down
4 changes: 2 additions & 2 deletions app/templates/landing_page.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ <h2>AGS4 Schema & Data Validation</h2>
<br>
<h3>AGS4 Validation </h3>
<p>Performs validation using the <a href="https://gitlab.com/ags-data-format-wg/ags-python-library.">Official AGS Python Library</a> <strong>version 0.4.1</strong>, this implements checks of the rules as written in the <strong>AGS data format standard v4.x.</strong></p>
<p style="color:red">If your using AGS Data Format Standard v3.x use our legacy <a href="https://webapps.bgs.ac.uk/data/ags/validation/index.cfm">AGS Validator</a></p>
<p style="color:red">If you're using AGS Data Format Standard v3.x use our legacy <a href="https://webapps.bgs.ac.uk/data/ags/validation/index.cfm">AGS Validator</a></p>
<br>
<h3>BGS Data Validation</h3>
<p>Your files will be validated against the below rules as defined by BGS/NGDC.</p>
Expand Down Expand Up @@ -142,7 +142,7 @@ <h4>Future data validation rules: (Coming Soon)</h4>
<section id="ags_data">
<h2>AGS Data Discovery</h2>
<br>
<p>Use the map below to find AGS data, click on the markers to find borehole information, links to graphical logs and the original submitted data.</p>
<p>Use the map below to find AGS data, click on the markers to find borehole information, links to graphical logs, <a href="https://www.bgs.ac.uk/technologies/geotechnical-data-services/ags-download-service/">.ags data download from the NGDC AGS database,</a> and the original submitted AGS data.</p>
<p><b>The map will show a maximum of 100 AGS markers - users may need to pan/zoom to display markers of interest. </b></p>
<p><b>If no AGS Submission Record is shown at the link provided there is likely a legacy confidentiality restriction.</b></p>
<br>
Expand Down
Loading

0 comments on commit 74fe9f7

Please sign in to comment.