Skip to content

Commit

Permalink
fix: Stop creating collection_file rows for file errors, #366
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Nov 2, 2024
1 parent 5c1774b commit b5f7bda
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 29 deletions.
4 changes: 2 additions & 2 deletions docs/database.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Database tables

..
To update the diagram, see https://ocp-software-handbook.readthedocs.io/en/latest/services/postgresql.html#generate-entity-relationship-diagram
java -jar schemaspy.jar -t pgsql -dp postgresql.jar -host localhost -db kingfisher_process -o schemaspy -norows -I '(django|auth).*'
java -jar schemaspy.jar -t pgsql -dp postgresql.jar -o schemaspy -norows -I '(django|auth).*' -host localhost -db kingfisher_process -u MYUSER
.. list-table::
:header-rows: 1
Expand Down Expand Up @@ -81,7 +81,7 @@ collection table

Use this column to find the crawl log.
* - ``expected_files_count``
- The number of messages to expect from Kingfisher Collect.
- The number of non-error messages to expect from Kingfisher Collect.
* - ``store_start_at``
- The time at which the collection was added.
* - ``store_end_at``
Expand Down
31 changes: 14 additions & 17 deletions process/management/commands/api_loader.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import os.path

from django.conf import settings
Expand All @@ -6,9 +7,9 @@
from django.utils.translation import gettext as t
from yapw.methods import ack, publish

from process.models import Collection
from process.models import Collection, CollectionNote
from process.processors.loader import create_collection_file
from process.util import consume, decorator
from process.util import consume, create_note, decorator
from process.util import wrap as w

# Other applications use this routing key.
Expand All @@ -27,27 +28,23 @@ def handle(self, *args, **options):

def callback(client_state, channel, method, properties, input_message):
collection_id = input_message["collection_id"]
collection_file_url = input_message["url"]
url = input_message["url"]
path = input_message.get("path")
errors = input_message.get("errors")

collection = Collection.objects.get(pk=collection_id)
if collection.deleted_at:
ack(client_state, channel, method.delivery_tag)
return

# In Kingfisher Collect, `path` is set if and only if `errors` isn't set.
if "path" in input_message:
filename = os.path.join(settings.KINGFISHER_COLLECT_FILES_STORE, input_message["path"])
else:
filename = collection_file_url

with transaction.atomic():
collection_file = create_collection_file(
collection, filename=filename, url=collection_file_url, errors=input_message.get("errors")
)

# FileError items from Kingfisher Collect are not processed further.
if "errors" not in input_message:
message = {"collection_id": collection_id, "collection_file_id": collection_file.pk}
publish(client_state, channel, message, routing_key)
if errors:
create_note(collection, CollectionNote.Level.ERROR, f"Couldn't download {url}", data=json.loads(errors))
# FileError items from Kingfisher Collect are not processed further.
else:
filename = os.path.join(settings.KINGFISHER_COLLECT_FILES_STORE, path)
collection_file = create_collection_file(collection, filename=filename, url=url)
message = {"collection_id": collection_id, "collection_file_id": collection_file.pk}
publish(client_state, channel, message, routing_key)

ack(client_state, channel, method.delivery_tag)
13 changes: 3 additions & 10 deletions process/processors/loader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import argparse
import copy
import json
import logging
import os

Expand All @@ -9,7 +8,7 @@
from process.exceptions import InvalidFormError
from process.forms import CollectionFileForm, CollectionForm, CollectionNote, CollectionNoteForm
from process.models import Collection, CollectionFile, ProcessingStep
from process.util import create_note, create_step
from process.util import create_step

logger = logging.getLogger(__name__)

Expand All @@ -21,26 +20,20 @@ def file_or_directory(path):
return path


def create_collection_file(collection, filename=None, url=None, errors=None) -> CollectionFile:
def create_collection_file(collection, filename=None, url=None) -> CollectionFile:
"""
Create file for a collection and steps for this file.
:param Collection collection: collection
:param str filename: path to file data
:param json errors: errors to be stored
:returns: created collection file
:raises InvalidFormError: if there is a validation error
"""
form = CollectionFileForm({"collection": collection, "filename": filename, "url": url})

if form.is_valid():
collection_file = form.save()
if not errors:
create_step(ProcessingStep.Name.LOAD, collection.pk, collection_file=collection_file)
else:
note = f"Couldn't download {collection_file}" # path is set to url in api_loader
create_note(collection, CollectionNote.Level.ERROR, note, data=json.loads(errors))

create_step(ProcessingStep.Name.LOAD, collection.pk, collection_file=collection_file)
return collection_file

raise InvalidFormError(form.error_messages)
Expand Down

0 comments on commit b5f7bda

Please sign in to comment.