diff --git a/.github/workflows/lighthouse.yml b/.github/workflows/lighthouse.yml index e62915fba..9c0e14e62 100644 --- a/.github/workflows/lighthouse.yml +++ b/.github/workflows/lighthouse.yml @@ -19,7 +19,7 @@ jobs: ports: - 5432:5432 solr: - image: solr:8.6 + image: solr:9.2 ports: - 8983:8983 steps: diff --git a/.github/workflows/sphinx_docs.yml b/.github/workflows/sphinx_docs.yml index 418cd339c..a29708f74 100644 --- a/.github/workflows/sphinx_docs.yml +++ b/.github/workflows/sphinx_docs.yml @@ -49,7 +49,7 @@ jobs: # for pull requests, exit with error if documentation coverage is incomplete - name: Report on documentation coverage if: ${{ github.event_name == 'pull_request' }} - run: if [[ $((`wc -l < docs/_build/coverage/python.txt`)) -eq 2 ]] ; then echo "Documentation coverage complete"; else cat docs/_build/coverage/python.txt && exit 1; fi + run: if [[ $((`wc -l < docs/_build/coverage/python.txt`)) -eq 3 ]] ; then echo "Documentation coverage complete"; else cat docs/_build/coverage/python.txt && exit 1; fi # when building on push to main, publish the built docs - name: Deploy built docs to github pages diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 847b57ac1..d92b1ada1 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -30,7 +30,7 @@ jobs: ports: - 5432:5432 solr: - image: solr:8.6 + image: solr:9.2 ports: - 8983:8983 steps: @@ -43,7 +43,7 @@ jobs: docker cp solr_conf ${{ job.services.solr.id }}:/opt/solr/server/solr/configsets/geniza docker exec --user root ${{ job.services.solr.id }} /bin/bash -c "chown -R solr:solr /opt/solr/server/solr/configsets/geniza" - - name: Copy solr configsets to solr home directory (Solr 8 specific) + - name: Copy solr configsets to solr home directory run: "docker exec -d ${{ job.services.solr.id }} cp -r /opt/solr/server/solr/configsets /var/solr/data" # Python version to use is stored in the .python-version file, which is the diff --git a/.github/workflows/visual_tests.yml b/.github/workflows/visual_tests.yml index 5df186846..1fbe115af 100644 --- a/.github/workflows/visual_tests.yml +++ b/.github/workflows/visual_tests.yml @@ -53,7 +53,7 @@ jobs: ports: - 5432:5432 solr: - image: solr:8.6 + image: solr:9.2 ports: - 8983:8983 steps: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ed20db90..1af314737 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,22 +1,22 @@ repos: - repo: https://github.com/psf/black - rev: 22.3.0 # Replace by any tag/version: https://github.com/psf/black/tags + rev: 23.3.0 # Replace by any tag/version: https://github.com/psf/black/tags hooks: - id: black # Assumes that your shell's `python` command is linked to python3.6+ language_version: python - repo: https://github.com/pycqa/isort - rev: 5.9.3 + rev: 5.12.0 hooks: - id: isort args: ["--profile", "black", "--filter-files"] - repo: https://github.com/pre-commit/mirrors-prettier - rev: v2.4.1 + rev: v2.7.1 hooks: - id: prettier exclude: \.html$ # exclude django templates, which prettier does not support - repo: https://github.com/rtts/djhtml - rev: v1.4.9 + rev: 3.0.6 hooks: - id: djhtml diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 65e5d44e2..21a713232 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,44 @@ Change Log ========== +4.15 +---- + +- public site + - bugfix: On tag change, document indexing is one revision behind + - bugfix: Input date not always populating + - bugfix: Digital translation footnote in scholarship records behaving incorrectly, excluding other footnotes on source + +- image, transcription, translation viewer/editor + - As a front end desktop user, I would like to see a bigger version of the document image in order to read the document (especially when no transcription exists). + - As a public site viewer, I would like to see translation alongside the document image by default if both are present, so that I can read the document in my native language. + - As a content editor, I want the "pop out" button in the transcription editor up higher, so it's immediately accessible. + - As a content editor, I want the ability to add polygon annotation boxes using the transcription editor, so I can draw accurate bounding boxes around text. + - As a content editor, I want the location field for digital edition/translations to automatically populate from an existing edition/translation on the same source, so that I can save time manually re-entering it. + - bugfix: Editing/deleting parts of annotation box titles results in unexpected behavior (no change or deleting entire annotation box) + - bugfix: In Safari, ITT panel toggles leave trails + - bugfix: Annotations on the document detail page do not respect reordering + - bugfix: Transcription and translation may become misaligned when resizing window + - bugfix: Alignment between Arabic transcriptions and English translations is slightly off + +- admin + - As a content admin, I would like filters in the document admin to search by English and Hebrew language of translation, so that I can collect those documents for CSV export for use in teaching. + - As a content admin, I would like to include a rationale for the inferred date field from a list of options, so that I can enter data more efficiently and consistently. + - As a content admin, I want inferred date and accompanying notes in the csv exports of documents, so that I can keep track of this information in my own research. + - As a content editor, I want a "no language" option when entering source languages (with help text) for unpublished transcriptions because the language will automatically be determined by the document languages already present on the doc detail pages. + - As a content editor, I want clear help text when adding a source to explain how to select the source language, so that it is done consistently for translations and transcriptions. + - As a content admin, I want both dates on document and inferred dates to merge when I merge duplicate PGPIDS so no data is lost when cleaning up duplicates. If there are two different dates on documents for the same PGPID, I want there to be an error message drawing my attention to the issue so I can choose the correct date or otherwise record the discrepancy. + - As a content editor, I want a way to filter documents by date in the admin for enhanced csv exports + - bugfix: Mixed inlines/formsets breaks on lack of permissions + - bugfix: Merging two documents with digital content footnotes for the same source results in unique constraint violation + +- people and places + - As a content editor, I want a separate field to record people's names and roles in each document, so that I can build a structured dataset of all people across the PGP. + - As a content editor, I want a separate field in the document detail page so that I can record place information mentioned in the document. + - As a content editor, I want Person-Person relationship types visually sorted into their categories in the admin form, so that I can select them at a glance. + - As a content admin, when adding people-to-people relationships in person pages, I want an added "ambiguity" category to the drop down so I can clarify when people are similar/not the same. + - As a content admin, when viewing people-to-people relationships in person pages, I want reverse relationships to be visible, so that I don't inadvertently add a relationship twice. + 4.14.2 ------ diff --git a/DEPLOYNOTES.md b/DEPLOYNOTES.md index 2f948ff6a..16cb70d27 100644 --- a/DEPLOYNOTES.md +++ b/DEPLOYNOTES.md @@ -1,5 +1,10 @@ # Deploy Notes +## 4.15 + +- The minimum required Solr version has been bumped to 9.2. Please upgrade to this version, + update Solr configset, and then reindex all content with `python manage.py index`. + ## 4.14 - Seleucid calendar conversion is now implemented, so automatic conversion should be applied diff --git a/README.rst b/README.rst index 45b51a7b2..855d9608b 100644 --- a/README.rst +++ b/README.rst @@ -6,7 +6,7 @@ Princeton Geniza Project Python/Django web application for a version 4.x of the `Princeton Geniza Project `_. -Python 3.9 / Django 3.2 / Node 16 / Postgresql / Solr 8.6 +Python 3.9 / Django 3.2 / Node 16 / Postgresql / Solr 9.2 .. image:: https://zenodo.org/badge/DOI/10.5281/zenodo.7347726.svg :target: https://doi.org/10.5281/zenodo.7347726 diff --git a/docs/conf.py b/docs/conf.py index e7d4a31d6..0a0b57840 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -105,3 +105,6 @@ "get_.*_display", # django auto-generated method for choice fields "get_doc_relation_list", # multiselectfield auto method ] + +# Disable Sphinx 7.2+ coverage statistics, as this breaks CI +coverage_statistics_to_report = coverage_statistics_to_stdout = False diff --git a/geniza/__init__.py b/geniza/__init__.py index fa075f330..2f64cd956 100644 --- a/geniza/__init__.py +++ b/geniza/__init__.py @@ -1,4 +1,4 @@ -__version_info__ = (4, 14, 2, None) +__version_info__ = (4, 15, 0, None) # Dot-connect all but the last. Last is dash-connected if not None. diff --git a/geniza/annotations/tests/test_annotations_views.py b/geniza/annotations/tests/test_annotations_views.py index 2474228c5..4b51fca10 100644 --- a/geniza/annotations/tests/test_annotations_views.py +++ b/geniza/annotations/tests/test_annotations_views.py @@ -1,23 +1,20 @@ -import ast import json import uuid -from unittest.mock import patch import pytest from django.contrib.admin.models import ADDITION, CHANGE, DELETION, LogEntry +from django.contrib.contenttypes.models import ContentType from django.urls import reverse -from parasolr.django.indexing import ModelIndexable from pytest_django.asserts import assertContains, assertNotContains from geniza.annotations.models import Annotation from geniza.annotations.views import AnnotationResponse from geniza.corpus.models import Document -from geniza.footnotes.models import Footnote, Source +from geniza.footnotes.models import Footnote, Source, SourceType @pytest.mark.django_db class TestAnnotationList: - anno_list_url = reverse("annotations:list") def test_get_annotation_list(self, client, annotation): @@ -127,6 +124,9 @@ def test_create_annotation(self, admin_client, document, source, annotation_json ) # will raise error if digital edition footnote does not exist footnote = document.digital_editions().get(source=source) + # since there was no corresponding Edition footnote with a Location, the + # resulting digital footnote should not get a location + assert not footnote.location # should log action assert LogEntry.objects.filter( @@ -136,7 +136,6 @@ def test_create_annotation(self, admin_client, document, source, annotation_json @pytest.mark.django_db class TestAnnotationDetail: - anno_list_url = reverse("annotations:list") def test_get_annotation_detail(self, client, annotation): @@ -325,6 +324,44 @@ def test_delete_last_translation_anno(self, admin_client, translation_annotation assert footnote.annotation_set.count() == 0 assert Footnote.DIGITAL_TRANSLATION not in footnote.doc_relation + def test_corresponding_footnote_location(self, admin_client, document): + document_contenttype = ContentType.objects.get_for_model(Document) + # create an Edition footnote on the document and source + book = SourceType.objects.create(type="Book") + source = Source.objects.create(source_type=book) + Footnote.objects.create( + doc_relation=[Footnote.EDITION], + object_id=document.pk, + content_type=document_contenttype, + source=source, + location="doc. 123", + ) + # POST JSON to create a new annotation on the document and source + anno_dict = { + "body": [{"value": "new text"}], + "target": { + "source": { + "partOf": {"id": document.manifest_uri}, + } + }, + "dc:source": source.uri, + "motivation": "transcribing", + } + admin_client.post( + self.anno_list_url, + json.dumps(anno_dict), + content_type="application/json", + ) + # should not raise error because digital edition created by request + created_digital_edition = Footnote.objects.get( + doc_relation=[Footnote.DIGITAL_EDITION], + source__pk=source.pk, + content_type=document_contenttype, + object_id=document.pk, + ) + # should have its location copied from the existing Edition footnote + assert created_digital_edition.location == "doc. 123" + @pytest.mark.django_db class TestAnnotationSearch: diff --git a/geniza/annotations/views.py b/geniza/annotations/views.py index 70e784da5..212eda569 100644 --- a/geniza/annotations/views.py +++ b/geniza/annotations/views.py @@ -61,8 +61,10 @@ def parse_annotation_data(request): # determine if this is a transcription or translation if "motivation" in json_data and "translating" in json_data["motivation"]: doc_relation = Footnote.DIGITAL_TRANSLATION + corresponding_relation = Footnote.TRANSLATION else: doc_relation = Footnote.DIGITAL_EDITION + corresponding_relation = Footnote.EDITION # find or create DIGITAL_EDITION footnote for this source and document try: @@ -74,11 +76,32 @@ def parse_annotation_data(request): ) except Footnote.DoesNotExist: source = Source.objects.get(pk=source_id) + + # try to find a corresponding non-digital footnote for location field + # (i.e. Translation for Digital Translation, Edition for Digital Edition) + # NOTE: assumes that if there is exactly one non-digital footnote for this source, then + # the digital content is coming from the same location + try: + # use .get to ensure there is exactly one corresponding; + # otherwise ambiguous which location to use + corresponding_footnote = Footnote.objects.exclude(location="").get( + doc_relation__contains=corresponding_relation, + source__pk=source_id, + content_type=document_contenttype, + object_id=document_id, + ) + location = corresponding_footnote.location + except (Footnote.DoesNotExist, Footnote.MultipleObjectsReturned): + # if there are 0 or > 1 footnotes, location should be blank + location = "" + + # create a new digital footnote footnote = Footnote.objects.create( source=source, doc_relation=[doc_relation], object_id=document_id, content_type=document_contenttype, + location=location, ) LogEntry.objects.log_action( user_id=request.user.id, diff --git a/geniza/common/models.py b/geniza/common/models.py index 9c6960e57..4d8b2a019 100644 --- a/geniza/common/models.py +++ b/geniza/common/models.py @@ -1,6 +1,17 @@ +from functools import cache + from django.contrib.auth.models import User from django.db import models from django.utils.safestring import mark_safe +from modeltranslation.utils import fallbacks + + +def cached_class_property(f): + """ + Reusable decorator to cache a class property, as opposed to an instance property. + from https://stackoverflow.com/a/71887897 + """ + return classmethod(property(cache(f))) # Create your models here. @@ -60,3 +71,42 @@ class UserProfile(models.Model): def __str__(self): # needed for display label in admin return "User profile for %s" % (self.user) + + +class DisplayLabelMixin: + """ + Mixin for models with translatable display labels that may differ from names, in + order to override fallback behavior when a label for the current language is not defined. + Used for search response handling and display on the public frontend. + + Example: DocumentType with name 'Legal' has a display label in English, 'Legal document'. + In Hebrew, it only has a name 'מסמך משפטי' and no display label. In English, we want to show + DocumentType.display_label_en. In Hebrew, we want to show DocumentType.name_he because + display_label_he is not defined. We also need to ensure that the document type + מסמך משפטי can be looked up by display_label_en, as that is what gets indexed in solr. + """ + + def __str__(self): + # temporarily turn off model translate fallbacks; + # if display label for current language is not defined, + # we want name for the current language rather than the + # fallback value for display label + with fallbacks(False): + current_lang_label = self.display_label or self.name + + return current_lang_label or self.display_label or self.name + + def natural_key(self): + """Natural key, name""" + return (self.name,) + + @classmethod + def objects_by_label(cls): + """A dict of object instances keyed on English display label, used for search form + and search results, which should be based on Solr facet and query responses (indexed in + English).""" + return { + # lookup on display_label_en/name_en since solr should always index in English + (obj.display_label_en or obj.name_en): obj + for obj in cls.objects.all() + } diff --git a/geniza/context_processors.py b/geniza/context_processors.py index ec95f78de..8121ddbf9 100644 --- a/geniza/context_processors.py +++ b/geniza/context_processors.py @@ -18,5 +18,9 @@ def template_globals(request): ), "site": site, "GTAGS_ANALYTICS_ID": getattr(settings, "GTAGS_ANALYTICS_ID", None), + "IS_ARCHIVE_CRAWLER": "archive.org_bot" + in request.META.get("HTTP_USER_AGENT", "") + if hasattr(request, "META") + else False, } return context_extras diff --git a/geniza/corpus/admin.py b/geniza/corpus/admin.py index b9cac8112..f76231010 100644 --- a/geniza/corpus/admin.py +++ b/geniza/corpus/admin.py @@ -7,7 +7,7 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.contrib.postgres.fields import ArrayField from django.core.exceptions import ValidationError -from django.db.models import CharField, Count, F +from django.db.models import CharField, Count, F, Q, TextField from django.db.models.functions import Concat from django.forms.widgets import HiddenInput, Textarea, TextInput from django.http import HttpResponseRedirect @@ -18,6 +18,7 @@ from geniza.annotations.models import Annotation from geniza.common.admin import custom_empty_field_list_filter +from geniza.corpus.dates import DocumentDateMixin from geniza.corpus.metadata_export import AdminDocumentExporter, AdminFragmentExporter from geniza.corpus.models import ( Collection, @@ -30,6 +31,8 @@ ) from geniza.corpus.solr_queryset import DocumentSolrQuerySet from geniza.corpus.views import DocumentMerge +from geniza.entities.admin import PersonInline, PlaceInline +from geniza.entities.models import DocumentPlaceRelation, PersonDocumentRelation from geniza.footnotes.admin import DocumentFootnoteInline from geniza.footnotes.models import Footnote @@ -201,6 +204,8 @@ class HasTranslationListFilter(admin.SimpleListFilter): def lookups(self, request, model_admin): return ( ("yes", "Has translation"), + ("yes_en", "Has English translation"), + ("yes_he", "Has Hebrew translation"), ("no", "No translation"), ) @@ -209,12 +214,103 @@ def queryset(self, request, queryset): return queryset.filter( footnotes__doc_relation__contains=Footnote.DIGITAL_TRANSLATION ) + # Filters for English and Hebrew translations: + # In order to find documents with footnotes that satisfy both conditions, we need to make + # a second query within the first, per Django docs ("Spanning multi-valued relationships") + if self.value() == "yes_en": + return queryset.filter( + footnotes__in=Footnote.objects.filter( + doc_relation__contains=Footnote.DIGITAL_TRANSLATION, + source__languages__name="English", + ), + ) + if self.value() == "yes_he": + return queryset.filter( + footnotes__in=Footnote.objects.filter( + doc_relation__contains=Footnote.DIGITAL_TRANSLATION, + source__languages__name="Hebrew", + ), + ) if self.value() == "no": return queryset.exclude( footnotes__doc_relation__contains=Footnote.DIGITAL_TRANSLATION ) +class TextInputListFilter(admin.SimpleListFilter): + """ + Custom list filter class for text input, adapted from this solution by Haki Benita: + https://hakibenita.com/how-to-add-a-text-filter-to-django-admin + """ + + template = "admin/corpus/text_input_filter.html" + + def lookups(self, request, model_admin): + # Dummy, required to show the filter. + return ((),) + + def choices(self, changelist): + # Grab only the "all" option. + all_choice = next(super().choices(changelist)) + all_choice["query_parts"] = ( + (k, v) + for k, v in changelist.get_filters_params().items() + if k != self.parameter_name + ) + yield all_choice + + +class DateListFilter(TextInputListFilter): + """Admin date range filter for documents, using Solr queryset""" + + def queryset(self, request, queryset): + """Get the filtered queryset based on date range filter input""" + if self.value() is not None: + date = self.value() + + # exclude any results that don't have a date or dating + queryset = queryset.exclude( + Q(dating__isnull=True) & Q(doc_date_standard="") + ) + + # get all before "to date" if we're using DateBeforeListFilter, + # otherwise get all after "from date" + date_filter = ( + ("[* TO %s]" % date) + if self.parameter_name == "date__lte" + else ("[%s TO *]" % date) + ) + + # use Solr to take advantage of processed date range fields + sqs = ( + DocumentSolrQuerySet() + .filter(document_date_dr=date_filter) + .only("pgpid") + .get_results(rows=100000) + ) + # filter queryset by id if there are results + pks = [r["pgpid"] for r in sqs] + if sqs: + queryset = queryset.filter(pk__in=pks) + else: + queryset = queryset.none() + if not (DocumentDateMixin.re_date_format.match(date)): + messages.error( + request, "Dates must be in the format YYYY-MM-DD or YYYY." + ) + return queryset + + +class DateAfterListFilter(DateListFilter): + parameter_name = "date__gte" + title = "Date from (CE)" + + +class DateBeforeListFilter(DateListFilter): + parameter_name = "date__lte" + title = "Date to (CE)" + + class DocumentDatingInline(admin.TabularInline): """Inline for inferred dates on a document""" @@ -222,11 +318,26 @@ class DocumentDatingInline(admin.TabularInline): fields = ( "display_date", "standard_date", + "rationale", "notes", ) min_num = 0 extra = 1 - insert_after = "standard_date" + formfield_overrides = { + TextField: {"widget": Textarea(attrs={"rows": 4})}, + } + + +class DocumentPersonInline(PersonInline): + """Inline for people related to a document""" + + model = PersonDocumentRelation + + +class DocumentPlaceInline(PlaceInline): + """Inline for places related to a document""" + + model = DocumentPlaceRelation @admin.register(Document) @@ -290,34 +401,76 @@ def view_old_pgpids(self, obj): custom_empty_field_list_filter("review status", "Needs review", "OK"), ), "status", + DateAfterListFilter, + DateBeforeListFilter, ("textblock__fragment__collection", admin.RelatedOnlyFieldListFilter), ("languages", admin.RelatedOnlyFieldListFilter), ("secondary_languages", admin.RelatedOnlyFieldListFilter), ) - fields = ( - ("shelfmark", "id", "view_old_pgpids"), - "shelfmark_override", - "doctype", - ("languages", "secondary_languages"), - "language_note", - "description", + # organize into fieldsets so that we can insert inlines mid-form + fieldsets = ( ( - "doc_date_original", - "doc_date_calendar", - "doc_date_standard", - "standard_date", + None, + { + "fields": ( + ("shelfmark", "id", "view_old_pgpids"), + "shelfmark_override", + "doctype", + ("languages", "secondary_languages"), + "language_note", + "description", + ) + }, + ), + ( + None, + { + "fields": ( + ( + "doc_date_original", + "doc_date_calendar", + "doc_date_standard", + "standard_date", + ), + ), + }, + ), + ( + None, + { + "fields": ( + "tags", + "status", + ("needs_review", "notes"), + "image_order_override", + "admin_thumbnails", + ) + }, ), - "tags", - "status", - ("needs_review", "notes"), - "image_order_override", - "admin_thumbnails", # edition, translation ) autocomplete_fields = ["languages", "secondary_languages"] # NOTE: autocomplete does not honor limit_choices_to in model - inlines = [DocumentTextBlockInline, DocumentFootnoteInline, DocumentDatingInline] + inlines = [ + DocumentDatingInline, + DocumentTextBlockInline, + DocumentFootnoteInline, + DocumentPersonInline, + DocumentPlaceInline, + ] + # mixed fieldsets and inlines: /templates/admin/snippets/mixed_inlines_fieldsets.html + fieldsets_and_inlines_order = ( + "f", # shelfmark, languages, description fieldset + "f", # date on document fieldset + "i", # DocumentDatingInline + "f", # tags, status, order override fieldset + "itt", # images/transcription/translation panel + "i", # DocumentTextBlockInline + "i", # DocumentFootnoteInline + "i", # DocumentPersonInline + "i", # DocumentPlaceInline + ) class Media: css = {"all": ("css/admin-local.css",)} @@ -407,13 +560,29 @@ def save_model(self, request, obj, form, change): super().save_model(request, obj, form, change) def change_view(self, request, object_id, form_url="", extra_context=None): - """Customize this model's change_view to add IIIF images to context for - transcription viewer, then execute existing change_view""" + """Customize this model's change_view to add IIIF images and default/disabled panels + to context for transcription/translation viewer, then execute existing change_view + """ extra_ctx = extra_context or {} document = self.get_object(request, object_id) if document: + # get images images = document.iiif_images(with_placeholders=True) - extra_ctx.update({"images": images}) + # get available digital content panels + available_panels = document.available_digital_content + extra_ctx.update( + { + "images": images, + # show first two panels by default + "default_shown": available_panels[:2], + # disable any unavailable panels + "disabled": [ + panel + for panel in ["images", "translation", "transcription"] + if panel not in available_panels + ], + } + ) return super().change_view( request, object_id, form_url, extra_context=extra_ctx ) @@ -461,7 +630,8 @@ def history_view(self, request, object_id, extra_context=None): @admin.display(description="Merge selected documents") def merge_documents(self, request, queryset=None): """Admin action to merge selected documents. This action redirects to an intermediate - page, which displays a form to review for confirmation and choose the primary document before merging.""" + page, which displays a form to review for confirmation and choose the primary document before merging. + """ # Functionality drawn from https://github.com/Princeton-CDH/mep-django/blob/main/mep/people/admin.py # NOTE: using selected ids from form and ignoring queryset diff --git a/geniza/corpus/apps.py b/geniza/corpus/apps.py index 69ed8dc8e..e06da7f8f 100644 --- a/geniza/corpus/apps.py +++ b/geniza/corpus/apps.py @@ -1,5 +1,5 @@ from django.apps import AppConfig -from django.db.models.signals import pre_save +from django.db.models.signals import m2m_changed, pre_save class CorpusAppConfig(AppConfig): @@ -12,4 +12,7 @@ def ready(self): from geniza.corpus.models import TagSignalHandlers pre_save.connect(TagSignalHandlers.unidecode_tag, sender="taggit.Tag") + m2m_changed.connect( + TagSignalHandlers.tagged_item_change, sender="taggit.TaggedItem" + ) return super().ready() diff --git a/geniza/corpus/management/commands/googledoc_to_annotation.py b/geniza/corpus/management/commands/googledoc_to_annotation.py new file mode 100644 index 000000000..1b4cd91bf --- /dev/null +++ b/geniza/corpus/management/commands/googledoc_to_annotation.py @@ -0,0 +1,426 @@ +""" +Script to convert translation content from Google Docs template +to IIIF annotations in the configured annotation server. This is +a one-time script intended to import Lieberman translations. + +Intended to be run manually from the shell as follows: +./manage.py googledoc_to_annotation + -d GOOGLE_DRIVE_DRIVE_ID + -f GOOGLE_DRIVE_FOLDER_ID + -i GOOGLE_DRIVE_CLIENT_ID + -s GOOGLE_DRIVE_CLIENT_SECRET + +Adapted from tei_to_annotation management command. +""" + +import io +import re +import unicodedata +from collections import defaultdict + +from addict import Dict +from bs4 import BeautifulSoup +from django.conf import settings +from django.contrib.auth.models import User +from django.db import IntegrityError +from django.template.defaultfilters import pluralize +from django.utils import timezone +from google_auth_oauthlib.flow import InstalledAppFlow +from googleapiclient.discovery import build +from googleapiclient.errors import HttpError +from googleapiclient.http import MediaIoBaseDownload +from parasolr.django.signals import IndexableSignalHandler +from rich.progress import MofNCompleteColumn, Progress + +from geniza.annotations.models import Annotation +from geniza.corpus.annotation_export import AnnotationExporter +from geniza.corpus.management.commands import tei_to_annotation +from geniza.corpus.models import Document +from geniza.footnotes.models import Footnote, Source + + +class Command(tei_to_annotation.Command): + """Synchronize Google Doc translations to digital translation footnote content""" + + v_normal = 1 # default verbosity + + normalized_unicode = set() + document_not_found = [] + source_not_found = [] + bad_file = [] + footnote_exists = [] + + REMOVE_ATTRIBUTES = ["style", "class"] # attributes to strip from html elements + + def add_arguments(self, parser): + parser.add_argument( + "-d", + "--drive_id", + required=True, + help="The ID of the shared Google Drive for the PGP.", + ) + parser.add_argument( + "-f", + "--folder_id", + required=True, + help="The ID of the Google Drive folder containing translation documents.", + ) + parser.add_argument( + "-i", + "--client_id", + required=True, + help="The Google Drive API client ID with the permission to see and download documents.", + ) + parser.add_argument( + "-s", + "--client_secret", + required=True, + help="The Google Drive API client secret matching the ID.", + ) + + def handle(self, *args, **options): + self.verbosity = options["verbosity"] + self.stats = defaultdict(int) + self.stats["files"] = 0 + self.stats["created"] = 0 + self.script_run_start = timezone.now() + + # get script user for log entries + self.script_user = User.objects.get(username=settings.SCRIPT_USERNAME) + + # get drive and folder id from provided args + self.drive_id = options["drive_id"] + self.folder_id = options["folder_id"] + + # disconnect solr indexing signals + IndexableSignalHandler.disconnect() + + # initialize annotation exporter; don't push changes until the end + self.anno_exporter = AnnotationExporter( + stdout=self.stdout, + verbosity=options["verbosity"], + push_changes=False, + ) + self.anno_exporter.setup_repo() + + # use rich progressbar without context manager + progress = Progress( + MofNCompleteColumn(), *Progress.get_default_columns(), expand=True + ) + progress.start() + fetch_task = progress.add_task("Fetching list of files...", total=None) + + try: + # create drive api client (https://developers.google.com/drive/api/quickstart/python) + self.service = build( + "drive", + "v3", + credentials=self.get_credentials( + client_id=options["client_id"], + client_secret=options["client_secret"], + ), + ) + + # get list of all files + files = self.list_all_files() + n = len(files) + progress.update( + fetch_task, completed=n, total=n, description=f"Found {n} files." + ) + + # loop through each file, and process it + process_task = progress.add_task(f"Processing...", total=n) + for file in files: + if self.verbosity > self.v_normal: + print(f"Processing {file.get('name')}") + self.stats["files"] += 1 + html_file = self.download_as_html(file.get("id")) + self.process_file(file.get("name"), html_file) + progress.update(process_task, advance=1, update=True) + + except HttpError as error: + self.style.ERROR(f"An error occurred: {error}") + + progress.refresh() + progress.stop() + + print( + "Processed %(files)d Google Doc(s). \nCreated %(created)d annotation(s)." + % self.stats + ) + + # push all changes from import to github + self.anno_exporter.sync_github() + + # report on missing sources + if self.source_not_found: + print( + "Could not find footnotes for %s document%s:" + % (len(self.source_not_found), pluralize(self.source_not_found)) + ) + for source in self.source_not_found: + print("\t%s" % source) + + # report on unicode normalization + if self.normalized_unicode: + print( + "Normalized unicode for %s document%s:" + % (len(self.normalized_unicode), pluralize(self.normalized_unicode)) + ) + for doc in self.normalized_unicode: + print("\t%s" % doc) + + # report on documents not found + if self.document_not_found: + print( + "Document not found for %s PGPID%s:" + % (len(self.document_not_found), pluralize(self.document_not_found)) + ) + for doc in self.document_not_found: + print("\t%s" % doc) + + if self.bad_file: + print( + "Bad formatting for %s file%s (skipped):" + % (len(self.bad_file), pluralize(self.bad_file)) + ) + for doc in self.bad_file: + print("\t%s" % doc) + + if self.footnote_exists: + print( + "Footnote already exists for %s file%s (skipped):" + % (len(self.footnote_exists), pluralize(self.footnote_exists)) + ) + for doc in self.footnote_exists: + print("\t%s" % doc) + + def get_credentials(self, client_id, client_secret): + SCOPES = ["https://www.googleapis.com/auth/drive.readonly"] + # Start the authorization flow based on the API ID and secret, which will prompt the user + # to log in. Once the auth flow is completed, return the generated credentials. + flow = InstalledAppFlow.from_client_config( + { + "installed": { + "client_id": client_id, + "project_id": "geniza-ingest", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_secret": client_secret, + "redirect_uris": ["http://localhost"], + } + }, + SCOPES, + ) + creds = flow.run_local_server(port=0) + return creds + + def list_all_files(self): + # adapted from https://developers.google.com/drive/api/guides/search-files + files = [] + page_token = None + # loop through all pages until there are no more new pages + while True: + # find all files in the folder folder_id and drive drive_id + response = ( + # see https://developers.google.com/drive/api/reference/rest/v3/files/list + self.service.files() + .list( + # limit to files within this subfolder and not in the trash + q=f"'{self.folder_id}' in parents and trashed=false", + driveId=self.drive_id, + includeItemsFromAllDrives=True, # required for shared drives + corpora="drive", # include all files in drive, not just current user's + supportsAllDrives=True, # required for files.list + pageToken=page_token, # current page + ) + .execute() + ) + # add files to list + files += response.get("files", []) + # go to the next page if there's a next page token, otherwise end the loop + page_token = response.get("nextPageToken", None) + if page_token is None: + break + return files + + def download_as_html(self, file_id): + # export file to HTML + request = self.service.files().export_media( + fileId=file_id, mimeType="text/html" + ) + file = io.BytesIO() + + # start the download, then loop through all chunks until file is downloaded + downloader = MediaIoBaseDownload(file, request) + done = False + while done is False: + _, done = downloader.next_chunk() + + return file.getvalue() + + def new_translation_annotation(self): + # initialize a new annotation dict object with all the defaults set + + anno = Dict() + setattr(anno, "@context", "http://www.w3.org/ns/anno.jsonld") + anno.type = "Annotation" + anno.body = [Dict()] + anno.body[0].type = "TextualBody" + anno.body[0].format = "text/html" + # supplement rather than painting over the image + # add translating as secondary motivation + anno.motivation = ["sc:supplementing", "translating"] + + anno.target.source.type = "Canvas" + anno.target.selector.type = "FragmentSelector" + anno.target.selector.conformsTo = "http://www.w3.org/TR/media-frags/" + + return anno + + def process_file(self, name, html_file): + soup = BeautifulSoup(html_file, "html.parser") + # first table is metadata, second table is translation + tables = soup.find_all("table") + if not tables or len(tables) != 2 or not tables[0].find("td"): + print( + self.style.WARNING( + f"The file {name} does not match the expected format; skipping" + ) + ) + self.bad_file.append(name) + return + # extract the footnote metadata + metadata = tables[0].find_all("td") + (pgpid, source_id, location) = (td.get_text() for td in metadata[3:]) + + # check if doc name mismatched with pgpid in metadata table; use doc name if so + pgpid_match = re.search("PGPID (?P\d+)", name) + if pgpid_match and pgpid_match.group("pgpid") != pgpid: + pgpid = pgpid_match.group("pgpid") + + # get the document + try: + doc = Document.objects.get(pk=int(pgpid)) + except Document.DoesNotExist: + print( + self.style.WARNING("Document not found for PGPID %s; skipping" % pgpid) + ) + self.document_not_found.append(pgpid) + return + + # get the source + try: + source = Source.objects.get(pk=int(source_id)) + except Source.DoesNotExist: + print( + self.style.WARNING( + f"Source not found for Source ID {source_id} (on PGPID {pgpid}); skipping" + ) + ) + self.source_not_found.append(source_id) + return + + # get the first canvas only; researchers want all translation content on first img + canvas_base_uri = doc.manifest_uri.replace("manifest", "canvas") + iiif_canvas = next( + iter(doc.iiif_images(filter_side=True)), + f"{canvas_base_uri}1/", # default in case there are no images + ) + + # get or create a digital translation footnote + try: + footnote = Footnote.objects.create( + object_id=doc.pk, + content_type=self.get_content_type(doc), + source=source, + doc_relation=Footnote.DIGITAL_TRANSLATION, + ) + except IntegrityError: + print( + self.style.WARNING(f"The footnote for {name} already exists; skipping") + ) + self.footnote_exists.append(name) + return + footnote.location = location + footnote.save() + # log creation + self.log_addition( + footnote, + "Created new footnote for imported Google Docs digital translation", + ) + + # extract, process, and create annotations from the translation blocks + translation = tables[1] + tds = translation.find_all("td") + + # loop through blocks: each 2-column row is a block, first 2 cells are headers + blocks = (len(tds) - 2) / 2 + for i in range(int(blocks)): + # new row every 2 cells; starts at cell 3 because first 2 cells are column headers + row_start = (i * 2) + 2 + # extract the annotation block label (first column) + block_label = tds[row_start].get_text() + # extract the translation (second column) + translation_cell = tds[row_start + 1] + # first, process and cleanup all the tags + for tag in translation_cell.descendants: + try: + # remove unneeded attributes + tag.attrs = { + key: value + for (key, value) in tag.attrs.items() + if key not in self.REMOVE_ATTRIBUTES + } + # remove unnecessary spans that Google Docs adds to each li + for span in tag.find_all("span"): + span.unwrap() + # clear out any unneeded newlines exposed by the previous step + tag.smooth() + except AttributeError: + # 'NavigableString' object has no attribute 'attrs'; fine to ignore + pass + html = "\n".join([str(tag) for tag in translation_cell.contents]) + + # create an annotation + anno = self.new_translation_annotation() + + # place on the first canvas + anno.target.source.id = iiif_canvas + # apply to the full canvas using % notation + # (using nearly full canvas to make it easier to edit zones) + anno.target.selector.value = "xywh=percent:1,1,98,98" + + # add html and optional label to annotation text body + if not unicodedata.is_normalized("NFC", html): + self.normalized_unicode.add(pgpid) + html = unicodedata.normalize("NFC", html) + anno.body[0].value = html + + # check if label text requires normalization + if not unicodedata.is_normalized("NFC", block_label): + self.normalized_unicode.add(pgpid) + block_label = unicodedata.normalize("NFC", block_label) + # add label to annotation + anno.body[0].label = block_label + + # order according to block number + anno["schema:position"] = i + 1 + + # create database annotation + db_anno = Annotation() + db_anno.set_content(dict(anno)) + # link to digital translation footnote + db_anno.footnote = footnote + db_anno.save() + # log entry to document annotation creation + self.log_addition(db_anno, "Imported from Google Doc translation") + + self.stats["created"] += 1 + + # export html/txt translation files to github backup + self.anno_exporter.export( + pgpids=[doc.pk], + commit_msg="Translation imported from Google Doc - PGPID %d" % doc.pk, + ) diff --git a/geniza/corpus/management/commands/tei_to_annotation.py b/geniza/corpus/management/commands/tei_to_annotation.py index 061e849ac..8827da3e4 100644 --- a/geniza/corpus/management/commands/tei_to_annotation.py +++ b/geniza/corpus/management/commands/tei_to_annotation.py @@ -28,7 +28,6 @@ from rich.progress import MofNCompleteColumn, Progress from geniza.annotations.models import Annotation -from geniza.annotations.signals import disconnect_signal_handlers from geniza.common.utils import absolutize_url from geniza.corpus.annotation_export import AnnotationExporter from geniza.corpus.management.commands import sync_transcriptions @@ -64,8 +63,6 @@ def handle(self, *args, **options): # disconnect solr indexing signals IndexableSignalHandler.disconnect() - # disconnect annotation signal handlers - disconnect_signal_handlers() # make sure we have latest tei content from git repository # (inherited from sync transcriptions command) diff --git a/geniza/corpus/metadata_export.py b/geniza/corpus/metadata_export.py index 76369c7b9..d1dec2425 100644 --- a/geniza/corpus/metadata_export.py +++ b/geniza/corpus/metadata_export.py @@ -33,6 +33,10 @@ class DocumentExporter(Exporter): "doc_date_original", "doc_date_calendar", "doc_date_standard", + "inferred_date_display", + "inferred_date_standard", + "inferred_date_rationale", + "inferred_date_notes", "initial_entry", "last_modified", "input_by", @@ -74,6 +78,7 @@ def get_queryset(self): "secondary_languages", "log_entries", "log_entries__user", + "dating_set", Prefetch( "footnotes", queryset=Footnote.objects.select_related( @@ -126,6 +131,7 @@ def get_export_data_dict(self, doc): for fragment in all_fragments if fragment.collection ] + datings = doc.dating_set.all() outd = {} outd["pgpid"] = doc.id @@ -158,6 +164,18 @@ def get_export_data_dict(self, doc): outd["doc_date_original"] = doc.doc_date_original outd["doc_date_calendar"] = doc.get_doc_date_calendar_display() outd["doc_date_standard"] = doc.doc_date_standard + outd["inferred_date_display"] = [ + dating.display_date for dating in datings if dating.display_date + ] + outd["inferred_date_standard"] = [ + dating.standard_date for dating in datings if dating.standard_date + ] + outd["inferred_date_rationale"] = [ + dating.get_rationale_display() for dating in datings if dating.notes + ] + outd["inferred_date_notes"] = [ + dating.notes for dating in datings if dating.notes + ] # default sort is most recent first, so initial input is last # convert to list so we can do negative indexing, instead of calling last() diff --git a/geniza/corpus/migrations/0041_dating_rationale.py b/geniza/corpus/migrations/0041_dating_rationale.py new file mode 100644 index 000000000..11ec40f89 --- /dev/null +++ b/geniza/corpus/migrations/0041_dating_rationale.py @@ -0,0 +1,36 @@ +# Generated by Django 3.2.16 on 2023-06-22 17:45 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("corpus", "0040_tag_merge_permissions"), + ] + + operations = [ + migrations.AddField( + model_name="dating", + name="rationale", + field=models.CharField( + choices=[ + ("PA", "Paleography"), + ("PE", "Person mentioned"), + ("E", "Event mentioned"), + ("C", "Coinage"), + ("O", "Other (please specify)"), + ], + default="O", + help_text="An explanation for how this date was inferred", + max_length=2, + ), + ), + migrations.AlterField( + model_name="dating", + name="notes", + field=models.TextField( + help_text="Optional further details about the rationale" + ), + ), + ] diff --git a/geniza/corpus/models.py b/geniza/corpus/models.py index 6afa02646..828d5f64b 100644 --- a/geniza/corpus/models.py +++ b/geniza/corpus/models.py @@ -2,7 +2,7 @@ import re from collections import defaultdict from copy import deepcopy -from functools import cache, cached_property +from functools import cached_property from itertools import chain from django.conf import settings @@ -28,7 +28,6 @@ from django.utils.translation import gettext as _ from djiffy.models import Manifest from modeltranslation.manager import MultilingualQuerySet -from modeltranslation.utils import fallbacks from parasolr.django.indexing import ModelIndexable from piffle.image import IIIFImageClient from piffle.presentation import IIIFException, IIIFPresentation @@ -38,25 +37,21 @@ from urllib3.exceptions import HTTPError, NewConnectionError from geniza.annotations.models import Annotation -from geniza.common.models import TrackChangesModel +from geniza.common.models import ( + DisplayLabelMixin, + TrackChangesModel, + cached_class_property, +) from geniza.common.utils import absolutize_url from geniza.corpus.annotation_utils import document_id_from_manifest_uri from geniza.corpus.dates import DocumentDateMixin from geniza.corpus.iiif_utils import GenizaManifestImporter, get_iiif_string from geniza.corpus.solr_queryset import DocumentSolrQuerySet -from geniza.footnotes.models import Creator, Footnote, Source +from geniza.footnotes.models import Creator, Footnote logger = logging.getLogger(__name__) -def cached_class_property(f): - """ - Reusable decorator to cache a class property, as opposed to an instance property. - from https://stackoverflow.com/a/71887897 - """ - return classmethod(property(cache(f))) - - class CollectionManager(models.Manager): """Custom manager for :class:`Collection` with natural key lookup""" @@ -382,7 +377,7 @@ def get_by_natural_key(self, name): return self.get(name_en=name) -class DocumentType(models.Model): +class DocumentType(DisplayLabelMixin, models.Model): """Controlled vocabulary of document types.""" name = models.CharField(max_length=255, unique=True) @@ -391,31 +386,11 @@ class DocumentType(models.Model): blank=True, help_text="Optional label for display on the public site", ) - objects = DocumentTypeManager() - def __str__(self): - # temporarily turn off model translate fallbacks; - # if display label for current language is not defined, - # we want name for the current language rather than the - # fallback value for display label - with fallbacks(False): - current_lang_label = self.display_label or self.name - - return current_lang_label or self.display_label or self.name - - def natural_key(self): - """Natural key, name""" - return (self.name,) - @cached_class_property def objects_by_label(cls): - """A dict of DocumentType object instances keyed on English display label""" - return { - # lookup on display_label_en/name_en since solr should always index in English - (doctype.display_label_en or doctype.name_en): doctype - for doctype in cls.objects.all() - } + return super().objects_by_label() class DocumentSignalHandlers: @@ -428,6 +403,7 @@ class DocumentSignalHandlers: "fragment": "fragments", "tag": "tags", "document type": "doctype", + "tagged item": "tagged_items", "Related Fragment": "textblock", # textblock verbose name "footnote": "footnotes", "source": "footnotes__source", @@ -485,6 +461,14 @@ def unidecode_tag(sender, instance, **kwargs): """Convert saved tags to ascii, stripping diacritics.""" instance.name = unidecode(instance.name) + @staticmethod + def tagged_item_change(sender, instance, action, **kwargs): + """Ensure document (=instance) is indexed after the tags m2m relationship is saved and the + list of tags is pulled from the database, on any tag change.""" + if action in ["post_add", "post_remove", "post_clear"]: + logger.debug("taggit.TaggedItem %s, reindexing related document", action) + ModelIndexable.index_items(Document.objects.filter(pk=instance.pk)) + class DocumentQuerySet(MultilingualQuerySet): def metadata_prefetch(self): @@ -921,6 +905,21 @@ def has_digital_content(self): ] ) + @property + def available_digital_content(self): + """Helper method for the ITT viewer to collect all available panels into a list""" + + # NOTE: this is ordered by priority, with images first, then translations over + # transcriptions. + available_panels = [] + if self.has_image(): + available_panels.append("images") + if self.has_translation(): + available_panels.append("translation") + if self.has_transcription(): + available_panels.append("transcription") + return available_panels + @property def title(self): """Short title for identifying the document, e.g. via search.""" @@ -956,6 +955,16 @@ def digital_translations(self): doc_relation__contains=Footnote.DIGITAL_TRANSLATION ).order_by("source") + @property + def default_translation(self): + """The first translation footnote that is in the current language, or the first + translation footnote ordered alphabetically by source if one is not available + in the current language.""" + + translations = self.digital_translations() + in_language = translations.filter(source__languages__code=get_language()) + return in_language.first() or translations.first() + def digital_footnotes(self): """All footnotes for this document where the document relation includes digital edition or digital translation.""" @@ -1198,7 +1207,8 @@ def index_data(self): try: last_log_entry = list(self.log_entries.all())[-1] except IndexError: - # should only occur in unit tests, not real data + # occurs in unit tests, and sometimes when new documents are indexed before + # log entry is populated last_log_entry = None if last_log_entry: @@ -1210,6 +1220,14 @@ def index_data(self): index_data[ "input_date_dt" ] = last_log_entry.action_time.isoformat().replace("+00:00", "Z") + elif self.created: + # when log entry not available, use created date on document object + # (will always exist except in some unit tests) + index_data["input_year_i"] = self.created.year + index_data["input_date_dt"] = self.created.isoformat().replace( + "+00:00", "Z" + ) + return index_data # define signal handlers to update the index based on changes @@ -1262,9 +1280,6 @@ def merge_with(self, merge_docs, rationale, user=None): metadata into this document, adds the merged documents into list of old PGP IDs, and creates a log entry documenting the merge, including the rationale.""" - # initialize old pgpid list if previously unset - if self.old_pgpids is None: - self.old_pgpids = [] # if user is not specified, log entry will be associated with # script and document will be flagged for review @@ -1287,10 +1302,58 @@ def merge_with(self, merge_docs, rationale, user=None): needs_review = [self.needs_review] if self.needs_review else [] for doc in merge_docs: - # add merge id to old pgpid list - self.old_pgpids.append(doc.id) + # handle document dates validation before making any changes; + # mismatch should result in exception (caught by DocumentMerge.form_valid) + if ( + ( + # both documents have standard dates, and they don't match + doc.doc_date_standard + and self.doc_date_standard + and self.doc_date_standard != doc.doc_date_standard + ) + or ( + # both documents have original dates, and they don't match + doc.doc_date_original + and self.doc_date_original + and self.doc_date_original != doc.doc_date_original + ) + or ( + # other document has original, this doc has standard, and they don't match + doc.doc_date_original + and self.doc_date_standard + and doc.standardize_date() != self.doc_date_standard + ) + or ( + # other document has standard, this doc has original, and they don't match + doc.doc_date_standard + and self.doc_date_original + and self.standardize_date() != doc.doc_date_standard + ) + ): + raise ValidationError( + "Merged documents must not contain conflicting dates; resolve before merge" + ) + # add any tags from merge document tags to primary doc self.tags.add(*doc.tags.names()) + + # if not in conflict (i.e. missing or exact duplicate), copy dates to result document + if doc.doc_date_standard: + self.doc_date_standard = doc.doc_date_standard + if doc.doc_date_original: + self.doc_date_original = doc.doc_date_original + self.doc_date_calendar = doc.doc_date_calendar + + # add inferred datings (conflicts or duplicates are post-merge + # data cleanup tasks) + for dating in doc.dating_set.all(): + self.dating_set.add(dating) + + # initialize old pgpid list if previously unset + if self.old_pgpids is None: + self.old_pgpids = [] + # add merge id to old pgpid list + self.old_pgpids.append(doc.id) # add description if set and not duplicated # for all supported languages for lang_code in language_codes: @@ -1362,9 +1425,50 @@ def merge_with(self, merge_docs, rationale, user=None): def _merge_footnotes(self, doc): # combine footnotes; footnote logic for merge_with for footnote in doc.footnotes.all(): - # check for match; add new footnote if not a match - equiv_fn = self.footnotes.includes_footnote(footnote) - if not equiv_fn: + # check for match. for each pair of footnotes, there are two possible cases for + # non-equivalence: + # - the footnote to be merged in has annotations + # - there are fields on the footnotes that don't match + # in the former case, merge the two by migrating the annotations from the footnote to + # be merged in to an otherwise matching footnote if there is one; else, add it to doc. + # in the latter case, simply add the footnote to this document. + + if footnote.annotation_set.exists(): + try: + # if the footnote to be merged in has annotations, try to reassign them to an + # otherwise matching footnote to avoid unique constraint violation + self_fn = self.footnotes.get( + # for multiselect field list, need to cast to list to compare + doc_relation__in=list(footnote.doc_relation), + source_id=footnote.source.pk, + ) + # copy over notes, location, url if missing from self_fn + for attr in ["notes", "location", "url"]: + if not getattr(self_fn, attr) and getattr(footnote, attr): + setattr(self_fn, attr, getattr(footnote, attr)) + self_fn.save() + # reassign each annotation's footnote to the footnote on this doc + for annotation in footnote.annotation_set.all(): + annotation.footnote = self_fn + annotation.save() + except Footnote.DoesNotExist: + # if there is no match, we are clear of any unique constaint violation and can + # simply add the footnote to this document + self.footnotes.add(footnote) + elif not self.footnotes.includes_footnote(footnote): + # if there is otherwise not a match, add the footnote to this document + + # first remove any digital doc relations to avoid unique constraint violation; + # footnote should not have such a relation anyway if there are 0 annotations, so + # this would be a data error. + if Footnote.DIGITAL_EDITION in footnote.doc_relation: + footnote.doc_relation.remove(Footnote.DIGITAL_EDITION) + footnote.save() + if Footnote.DIGITAL_TRANSLATION in footnote.doc_relation: + footnote.doc_relation.remove(Footnote.DIGITAL_TRANSLATION) + footnote.save() + + # then add to this document self.footnotes.add(footnote) def _merge_logentries(self, doc): @@ -1503,7 +1607,31 @@ class Meta: max_length=255, validators=[RegexValidator(DocumentDateMixin.re_date_format)], ) - notes = models.TextField( - help_text="An explanation for how this date was inferred, and/or by whom", + PALEOGRAPHY = "PA" + PALEOGRAPHY_LABEL = "Paleography" + PERSON = "PE" + PERSON_LABEL = "Person mentioned" + EVENT = "E" + EVENT_LABEL = "Event mentioned" + COINAGE = "C" + COINAGE_LABEL = "Coinage" + OTHER = "O" + OTHER_LABEL = "Other (please specify)" + RATIONALE_CHOICES = ( + (PALEOGRAPHY, PALEOGRAPHY_LABEL), + (PERSON, PERSON_LABEL), + (EVENT, EVENT_LABEL), + (COINAGE, COINAGE_LABEL), + (OTHER, OTHER_LABEL), + ) + rationale = models.CharField( + max_length=2, + choices=RATIONALE_CHOICES, + default=OTHER, + help_text="An explanation for how this date was inferred", blank=False, + null=False, + ) + notes = models.TextField( + help_text="Optional further details about the rationale", ) diff --git a/geniza/corpus/templates/admin/corpus/document/change_form.html b/geniza/corpus/templates/admin/corpus/document/change_form.html index 67dee2485..fbcaed741 100644 --- a/geniza/corpus/templates/admin/corpus/document/change_form.html +++ b/geniza/corpus/templates/admin/corpus/document/change_form.html @@ -13,20 +13,13 @@ {% render_bundle_csp "admin" "js" attrs='defer' %} {% endblock %} - +{# Render mixed normal and inline fieldsets #} {% block field_sets %} - {% for fieldset in adminform %} - {% include "admin/corpus/document/snippets/fieldset.html" with inline_admin_formsets=inline_admin_formsets %} - {% endfor %} + {% include "admin/snippets/mixed_inlines_fieldsets.html" %} {% endblock %} +{# Remove standard inline rendering #} {% block inline_field_sets %} - {% for inline_admin_formset in inline_admin_formsets %} - {# don't repeat inline formsets that were inserted earlier #} - {% if not inline_admin_formset.opts.insert_after %} - {% include inline_admin_formset.opts.template %} - {% endif %} - {% endfor %} {% endblock %} {% block after_field_sets %} @@ -69,11 +62,4 @@

{% endif %} {% endwith %} - -
-
- - {% include "corpus/snippets/document_transcription.html" with document=original %} -
-
{% endblock %} diff --git a/geniza/corpus/templates/admin/corpus/document/snippets/fieldset.html b/geniza/corpus/templates/admin/corpus/document/snippets/fieldset.html deleted file mode 100644 index 50ad513d4..000000000 --- a/geniza/corpus/templates/admin/corpus/document/snippets/fieldset.html +++ /dev/null @@ -1,40 +0,0 @@ -{% comment %} -modified django admin/includes/fieldset.html to allow inline formset after a field, adapted -from https://linevi.ch/en/django-inline-in-fieldset.html -{% endcomment %} - -
- {% if fieldset.name %}

{{ fieldset.name }}

{% endif %} - {% if fieldset.description %} -
{{ fieldset.description|safe }}
- {% endif %} - {% for line in fieldset %} -
- {% if line.fields|length_is:'1' %}{{ line.errors }}{% endif %} - {% for field in line %} - - {% if not line.fields|length_is:'1' and not field.is_readonly %}{{ field.errors }}{% endif %} - {% if field.is_checkbox %} - {{ field.field }}{{ field.label_tag }} - {% else %} - {{ field.label_tag }} - {% if field.is_readonly %} -
{{ field.contents }}
- {% else %} - {{ field.field }} - {% endif %} - {% endif %} - {% if field.field.help_text %} -
{{ field.field.help_text|safe }}
- {% endif %} -
- {# insert designated inlines after the field #} - {% for inline_admin_formset in inline_admin_formsets %} - {% if inline_admin_formset.opts.insert_after == field.field.name %} - {% include inline_admin_formset.opts.template %} - {% endif %} - {% endfor %} - {% endfor %} - - {% endfor %} -
diff --git a/geniza/corpus/templates/admin/corpus/text_input_filter.html b/geniza/corpus/templates/admin/corpus/text_input_filter.html new file mode 100644 index 000000000..acdc6d47e --- /dev/null +++ b/geniza/corpus/templates/admin/corpus/text_input_filter.html @@ -0,0 +1,30 @@ +{% comment %} + Template for a text input-based list filter, + adapted from Haki Benita https://hakibenita.com/how-to-add-a-text-filter-to-django-admin +{% endcomment %} + +

{{ title }}

+ \ No newline at end of file diff --git a/geniza/corpus/templates/corpus/document_scholarship.html b/geniza/corpus/templates/corpus/document_scholarship.html index 9545d3af4..da0363f08 100644 --- a/geniza/corpus/templates/corpus/document_scholarship.html +++ b/geniza/corpus/templates/corpus/document_scholarship.html @@ -26,19 +26,19 @@

{{ page_title }}

{# Translators: label for included document relations for a single footnote #} {% translate "includes" as includes_text %} - {% if source.list|length > 1 or source.list.0.location or source.list.0.url %} + {% if source.list|has_location_or_url %} {# Translators: label for document relations in list of footnotes #} - {% blocktranslate with relation=source.list.0.doc_relation|lower trimmed %} + {% blocktranslate with relation=source.list|all_doc_relations|lower trimmed %} for {{ relation }} see {% endblocktranslate%} {% else %} {# Translators: label for document relations for one footnote with no location or URL #} - {% blocktranslate with relation=source.list.0.doc_relation|lower trimmed %} + {% blocktranslate with relation=source.list|all_doc_relations|lower trimmed %} includes {{ relation }} {% endblocktranslate%} {% endif %}
- {% if source.list|length > 1 or source.list.0.location or source.list.0.url %} + {% if source.list|has_location_or_url %}
    {% for fn in source.list %} diff --git a/geniza/corpus/templates/corpus/snippets/document_transcription.html b/geniza/corpus/templates/corpus/snippets/document_transcription.html index 0d595670f..c5724707b 100644 --- a/geniza/corpus/templates/corpus/snippets/document_transcription.html +++ b/geniza/corpus/templates/corpus/snippets/document_transcription.html @@ -31,14 +31,14 @@ {# NOTE: inputs must be kept as SIBLINGS of panel-container for CSS selection/controls #} {# images displayed by default; disable if no images are available #} {# translators: label for checkbox toggle to show the images panel for a document #} - + - {# transcription displayed by default; disable if no content is available #} {# translators: label for checkbox toggle to show the transcription panel for a document #} - + + {# translation displayed by default; disable if no content is available #} {# translators: label for checkbox toggle to show the translation panel for a document #} - + {% comment %} @@ -90,13 +90,13 @@ {# dropdown is disabled by default; enable if javascript is active #}