Skip to content

Commit

Permalink
Save all the annotations after creating them
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Jan 29, 2024
1 parent c5961ee commit c9cbb71
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 4 deletions.
7 changes: 3 additions & 4 deletions readux_ingest_ecds/services/ocr_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,6 @@ def add_ocr_annotations(canvas, ocr):
word_order = 1
annotations = []
for word in ocr:
print(f'adding word {word}')
# A quick check to make sure the header row didn't slip through.
if word['x'] == 'x':
continue
Expand All @@ -391,7 +390,6 @@ def add_ocr_annotations(canvas, ocr):
word['content'].isspace()
):
word['content'] = ' '
print(f'creating anno for {word}')
anno = OCR()
anno.canvas = canvas
anno.x = word['x']
Expand All @@ -401,11 +399,12 @@ def add_ocr_annotations(canvas, ocr):
anno.resource_type = anno.OCR
anno.content = word['content']
anno.order = word_order
print(f'pushing {word}')
annotations.append(anno)
word_order += 1

print('saving')
# bulk_create does not call the model's save method. Saving the OCR annotation
# at the same time as creating it is very slow for unknown reasons. Once this
# method finishes, the next method that called will save all the new OCR annotations.
OCR.objects.bulk_create(annotations)

def add_oa_annotations(annotation_list_url):
Expand Down
4 changes: 4 additions & 0 deletions readux_ingest_ecds/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,8 @@ def add_ocr_task(manifest_id, *args, **kwargs):
ocr = get_ocr(canvas)
if ocr is not None:
add_ocr_annotations(canvas, ocr)
# The add_ocr_annotations method uses bulk_create() which does not call save() on the model.
# Calling save() is really slow and I don't know why. Calling save() after the annotation
# has been created, calling save is as fast as expected.
[ocr.save() for ocr in canvas.annotation_set.all()]
canvas.save() # trigger reindex

0 comments on commit c9cbb71

Please sign in to comment.