Skip to content

Commit

Permalink
Add language
Browse files Browse the repository at this point in the history
  • Loading branch information
jayvarner committed Jan 8, 2025
1 parent c398245 commit 6b2e9de
Show file tree
Hide file tree
Showing 13 changed files with 153 additions and 29 deletions.
39 changes: 21 additions & 18 deletions readux_ingest_ecds/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,25 @@
from django.apps import apps
from django.core.exceptions import AppRegistryNotReady


def get_iiif_models():
try:
return {
'Manifest': apps.get_model(settings.IIIF_MANIFEST_MODEL),
'ImageServer': apps.get_model(settings.IIIF_IMAGE_SERVER_MODEL),
'RelatedLink': apps.get_model(settings.IIIF_RELATED_LINK_MODEL),
'Canvas': apps.get_model(settings.IIIF_CANVAS_MODEL),
'Collection': apps.get_model(settings.IIIF_COLLECTION_MODEL),
'OCR': apps.get_model(settings.IIIF_OCR_MODEL),
}
except AppRegistryNotReady:
return {
'Manifest': settings.IIIF_MANIFEST_MODEL,
'ImageServer': settings.IIIF_IMAGE_SERVER_MODEL,
'RelatedLink': settings.IIIF_RELATED_LINK_MODEL,
'Canvas': settings.IIIF_CANVAS_MODEL,
'Collection': settings.IIIF_COLLECTION_MODEL,
'OCR': settings.IIIF_OCR_MODEL,
}
try:
return {
"Manifest": apps.get_model(settings.IIIF_MANIFEST_MODEL),
"ImageServer": apps.get_model(settings.IIIF_IMAGE_SERVER_MODEL),
"RelatedLink": apps.get_model(settings.IIIF_RELATED_LINK_MODEL),
"Canvas": apps.get_model(settings.IIIF_CANVAS_MODEL),
"Collection": apps.get_model(settings.IIIF_COLLECTION_MODEL),
"OCR": apps.get_model(settings.IIIF_OCR_MODEL),
"Language": apps.get_model(settings.IIIF_LANGUAGE_MODEL),
}
except AppRegistryNotReady:
return {
"Manifest": settings.IIIF_MANIFEST_MODEL,
"ImageServer": settings.IIIF_IMAGE_SERVER_MODEL,
"RelatedLink": settings.IIIF_RELATED_LINK_MODEL,
"Canvas": settings.IIIF_CANVAS_MODEL,
"Collection": settings.IIIF_COLLECTION_MODEL,
"OCR": settings.IIIF_OCR_MODEL,
"Language": settings.IIIF_LANGUAGE_MODEL,
}
12 changes: 9 additions & 3 deletions readux_ingest_ecds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
upload_trigger_file,
s3_copy,
)
from .services.iiif_services import create_manifest, create_manifest_from_pid
from .services.iiif_services import (
create_manifest,
create_manifest_from_pid,
find_language,
)
from .services.metadata_services import metadata_from_file, clean_metadata
from .helpers import get_iiif_models
from .storages import TmpStorage
Expand Down Expand Up @@ -459,7 +463,10 @@ def ingest(self):
manifest = create_manifest_from_pid(pid, self.image_server)
metadata = dict(row)
for key, value in metadata.items():
setattr(manifest, key, value)
if key == "language":
manifest.languages.add(find_language(value))
else:
setattr(manifest, key, value)

manifest.collections.set(self.collections.all())
manifest.save()
Expand All @@ -468,7 +475,6 @@ def ingest(self):
)

if created:

trigger_file = os.path.join(
settings.INGEST_TMP_DIR, str(local_ingest.id), f"{pid}.txt"
)
Expand Down
21 changes: 21 additions & 0 deletions readux_ingest_ecds/services/iiif_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,23 @@
OCR = get_iiif_models()["OCR"]


def set_default_language():
"""Create default language."""
Language = get_iiif_models()["Language"]
english, _ = Language.objects.get_or_create(code="en", name="English")
return english


def find_language(language):
"""Find Language object
Args:
language (str): Language code.
"""
Language = get_iiif_models()["Language"]
return Language.objects.get(code=language)


def create_manifest(ingest):
"""
Create or update a Manifest from supplied metadata and images.
Expand All @@ -35,6 +52,8 @@ def create_manifest(ingest):
if key == "related":
# add RelatedLinks from metadata spreadsheet key "related"
create_related_links(manifest, value)
elif key == "language":
manifest.languages.add(find_language(value))
else:
# all other keys should exist as fields on Manifest (for now)
setattr(manifest, key, value)
Expand All @@ -46,6 +65,7 @@ def create_manifest(ingest):

# Ensure that manifest has an ID before updating the M2M relationship
manifest.save()
manifest.languages.add(set_default_language())
manifest.refresh_from_db()
manifest.collections.set(ingest.collections.all())
# Save again once relationship is set
Expand All @@ -64,4 +84,5 @@ def create_manifest_from_pid(pid, image_server):
"""
Manifest = get_iiif_models()["Manifest"]
manifest, _ = Manifest.objects.get_or_create(pid=pid, image_server=image_server)
manifest.languages.add(set_default_language())
return manifest
1 change: 1 addition & 0 deletions readux_ingest_ecds/services/metadata_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def clean_metadata(metadata):
*(f.name for f in get_iiif_models()["Manifest"]._meta.get_fields()),
"related",
"filename",
"language",
]

metadata = {
Expand Down
2 changes: 2 additions & 0 deletions test_app/fixtures/metadata_with_language.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
PID,Label,Summary,Author,Published city,Published date,Publisher,Language
a96264fa-e8d3-524b-ab56-cfd3b2c71073,Test Bundle,Test file,Test author,Test City,2021,Publisher test,mus
9 changes: 9 additions & 0 deletions test_app/iiif/migrations/0001_initial.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ class Migration(migrations.Migration):
migrations.CreateModel(
name="Canvas",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
(
"pid",
models.CharField(
Expand Down
43 changes: 43 additions & 0 deletions test_app/iiif/migrations/0006_auto_20250108_1308.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Generated by Django 3.2.23 on 2025-01-08 13:08

from django.db import migrations, models
import django.utils.timezone
import uuid


class Migration(migrations.Migration):

dependencies = [
("iiif", "0005_canvas_resource"),
]

operations = [
migrations.CreateModel(
name="Language",
fields=[
(
"id",
models.BigAutoField(
auto_created=True,
primary_key=True,
serialize=False,
verbose_name="ID",
),
),
("code", models.CharField(max_length=16, unique=True)),
("name", models.CharField(max_length=255)),
],
options={
"ordering": ["name"],
},
),
migrations.AddField(
model_name="manifest",
name="languages",
field=models.ManyToManyField(
blank=True,
help_text="Languages present in the manifest.",
to="iiif.Language",
),
),
]
17 changes: 17 additions & 0 deletions test_app/iiif/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@
from django.contrib.auth.models import AbstractUser


class Language(models.Model):
"""Model to store language names and codes for multiple choice fields"""

code = models.CharField(max_length=16, unique=True)
name = models.CharField(max_length=255)

class Meta:
ordering = ["name"]

def __str__(self):
"""String representation of the language"""
return str(self.name)


class Collection(models.Model):
pid = models.UUIDField(primary_key=True, default=uuid4, editable=True)

Expand All @@ -25,6 +39,9 @@ class Manifest(models.Model):
published_city = models.TextField(null=True, blank=True)
publisher = models.TextField(null=True, blank=True)
metadata = models.JSONField(default=dict, blank=True)
languages = models.ManyToManyField(
Language, help_text="Languages present in the manifest.", blank=True
)

@property
def related_links(self):
Expand Down
1 change: 1 addition & 0 deletions test_app/test_app/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
IIIF_CANVAS_MODEL = "iiif.Canvas"
IIIF_COLLECTION_MODEL = "iiif.Collection"
IIIF_OCR_MODEL = "iiif.OCR"
IIIF_LANGUAGE_MODEL = "iiif.Language"
INGEST_TMP_DIR = os.path.join("tmp")
INGEST_PROCESSING_DIR = os.path.join("tmp", "processing")
INGEST_OCR_DIR = os.path.join("tmp", "ocr")
Expand Down
9 changes: 8 additions & 1 deletion test_app/tests/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from factory import Faker, SubFactory
from django.conf import settings
from readux_ingest_ecds.models import Local, Bulk, S3Ingest
from iiif.models import ImageServer, Manifest, User, Collection, Canvas
from iiif.models import ImageServer, Manifest, User, Collection, Canvas, Language


class UserFactory(DjangoModelFactory):
Expand Down Expand Up @@ -84,3 +84,10 @@ class CollectionFactory(DjangoModelFactory):

class Meta:
model = Collection


class LanguageFactory(DjangoModelFactory):
"""Factory for language objects."""

class Meta:
model = Language
26 changes: 20 additions & 6 deletions test_app/tests/test_iiif_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,32 @@
from django.conf import settings
from readux_ingest_ecds.services import iiif_services
from readux_ingest_ecds.services.metadata_services import metadata_from_file
from .factories import LocalFactory
from .factories import LocalFactory, LanguageFactory


class IIIFServicesTest(TestCase):
def setUp(self):
""" Set instance variables. """
"""Set instance variables."""
super().setUp()
self.fixture_path = settings.FIXTURE_DIR

def test_creating_manifest(self):
""" It should create a manifest with the ingest's metadata. """
extra_metadata = metadata_from_file(os.path.join(self.fixture_path, 'extra_metadata.csv'))[0]
"""It should create a manifest with the ingest's metadata."""
extra_metadata = metadata_from_file(
os.path.join(self.fixture_path, "extra_metadata.csv")
)[0]
local = LocalFactory.create(metadata=extra_metadata)
manifest = iiif_services.create_manifest(local)
assert extra_metadata['pid'] == manifest.pid
assert 'ssdl:spatialCoverageFastUri' in [d['label'] for d in manifest.metadata]
assert extra_metadata["pid"] == manifest.pid
assert "ssdl:spatialCoverageFastUri" in [d["label"] for d in manifest.metadata]

def test_creating_manifest_with_language(self):
"""It should create a manifest with the ingest's metadata."""

metadata = metadata_from_file(
os.path.join(self.fixture_path, "metadata_with_language.csv")
)[0]
language = LanguageFactory.create(name="Creek", code="mus")
local = LocalFactory.create(metadata=metadata)
manifest = iiif_services.create_manifest(local)
assert language in manifest.languages.all()
1 change: 1 addition & 0 deletions test_app/tests/test_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def test_creating_manifest(self):
local = self.mock_local("csv_meta.zip")
local.manifest = create_manifest(local)
assert local.manifest.pid == "sqn75"
assert local.manifest.languages.all()[0].code == "en"

def test_metadata_from_excel(self):
"""It should create a manifest with metadata supplied in an Excel file."""
Expand Down
1 change: 0 additions & 1 deletion test_app/tests/test_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def test_prevent_double_ocr(self):

ocr = ocr_services.get_ocr(canvas)
annos = ocr_services.add_ocr_annotations(canvas, ocr)
print(len(annos))
OCR.objects.bulk_create(annos)
assert len(annos) == 178
assert OCR.objects.count() == 178
Expand Down

0 comments on commit 6b2e9de

Please sign in to comment.