Skip to content

Commit

Permalink
Call save as late as possible
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Apr 23, 2024
1 parent 931d04a commit 37f0b69
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 12 deletions.
7 changes: 5 additions & 2 deletions readux_ingest_ecds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def open_metadata(self):

def create_canvases(self):
Canvas = get_iiif_models()['Canvas']
new_canvases = []
images = None
with open(self.trigger_file, 'r') as t_file:
images = t_file.read().splitlines()
Expand All @@ -195,14 +196,16 @@ def create_canvases(self):
except IndexError:
ocr_file_path = None

Canvas.objects.get_or_create(
new_canvases.append(Canvas(
manifest=self.manifest,
pid=canvas_pid,
ocr_file_path=ocr_file_path,
position=position,
width=width,
height=height
)
))

Canvas.objects.bulk_create(new_canvases)

upload_trigger_file(self.trigger_file)

Expand Down
16 changes: 6 additions & 10 deletions readux_ingest_ecds/services/ocr_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,10 +403,7 @@ def add_ocr_annotations(canvas, ocr):
annotations.append(anno)
word_order += 1

# bulk_create does not call the model's save method. Saving the OCR annotation
# at the same time as creating it is very slow for unknown reasons. Once this
# method finishes, the next method that called will save all the new OCR annotations.
OCR.objects.bulk_create(annotations)
return annotations

def add_oa_annotations(annotation_list_url):
data = fetch_url(annotation_list_url)
Expand Down Expand Up @@ -487,12 +484,11 @@ def is_tsv(to_test):

def add_ocr_to_canvases(manifest):
OCR = get_iiif_models()['OCR']
new_ocr_annotations = []
for canvas in manifest.canvas_set.all():
ocr = get_ocr(canvas)
if ocr is not None:
add_ocr_annotations(canvas, ocr)
# The add_ocr_annotations method uses bulk_create() which does not call save() on the model.
# Calling save() is really slow and I don't know why. Calling save() after the annotation
# has been created, calling save is as fast as expected.
[ocr.save() for ocr in OCR.objects.filter(canvas=canvas)]
canvas.save() # trigger reindex
new_ocr_annotations += add_ocr_annotations(canvas, ocr)

OCR.objects.bulk_create(new_ocr_annotations)
list(map(lambda canvas: canvas.save(), manifest.canvas_set.all()))

0 comments on commit 37f0b69

Please sign in to comment.