Merge branch 'release/4.17'

Princeton-CDH · Apr 24, 2024 · 886dd8d · 886dd8d
2 parents 782fdd9 + db55161
commit 886dd8d
Show file tree

Hide file tree

Showing 88 changed files with 4,409 additions and 2,107 deletions.
diff --git a/.github/workflows/lighthouse.yml b/.github/workflows/lighthouse.yml
@@ -23,7 +23,7 @@ jobs:
         ports:
           - 8983:8983
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Decrypt encrypted fonts zip
         run: |
           gpg --quiet --batch --yes --decrypt --passphrase="${{ secrets.GPG_PASSPHRASE }}" --output sitemedia/fonts.zip sitemedia/fonts.zip.gpg
@@ -37,11 +37,9 @@ jobs:
           docker exec -d ${{ job.services.solr.id }} cp -r /opt/solr/server/solr/configsets /var/solr/data
           docker exec ${{ job.services.solr.id }} solr create -c geniza -n geniza
       - run: sudo apt install gettext
-      - run: echo "PYTHON_VERSION=$(cat .python-version)" >> $GITHUB_ENV
-      - uses: actions/setup-python@v2
-        with:
-          python-version: ${{ env.PYTHON_VERSION }}
-      - uses: actions/cache@v2
+      - uses: actions/setup-python@v5
+        # uses version set in .python-version
+      - uses: actions/cache@v4
         with:
           path: ~/.cache/pip
           key: pip-${{ hashFiles('requirements/*.txt') }}
@@ -53,7 +51,7 @@ jobs:
       - uses: actions/setup-node@v2
         with:
           node-version: 16
-      - uses: actions/cache@v2
+      - uses: actions/cache@v4
         with:
           path: ~/.npm
           key: npm-${{ hashFiles('package-lock.json') }}

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,6 +1,37 @@
 Change Log
 ==========
 
+4.17
+----
+
+- public site
+    - As a public site user, I would like to see date ranges separated with an en-dash (–) instead of an em-dash (—).
+    - As a front end user, I only want to see one document number for a source displayed in the scholarship records on the public site.
+    - As a frontend user, I want to see dating information displayed on document details when available, so that I can find out the time frame of a document when it is known.
+    - bugfix: Double quotes search returning unexpected results
+    - bugfix: Issues with shelfmark scoped search
+    - bugfix: Highlighting context shows entire transcription or translation in search result
+    - bugfix: Transcription search results not always formatted correctly
+    - bugfix: Bracket and other character search is functioning unpredictably
+    - bugfix: Incorrect words are highlighted in complete word quotation search (Hebrew script)
+    - bugfix: Some partial search results in description not boosted by relevancy
+    - chore: accessibility issues flagged by DubBot
+
+- image, transcription, translation viewer/editor
+    - As a transcription editor, I should see an error if I try to update an annotation with out of date content so that I don't overwrite someone else's changes.
+    - bugfix: Autofill for source search (when inputting a transcription source) not functioning properly
+
+- admin
+    - As a content editor, I want to record places-to-places relationship on the place page and on the document detail page, so that I can track ambiguity.
+    - As a content admin, I want to drop down a pin on a map and then be able to move the pin around so that I can manually adjust the coordinates of a place before saving the location.
+    - As a content editor, I want there to be a notes field in the places pages so that I can add more detail about places that are hard-to-find.
+    - As a content admin, I want a provenance field on the document detail page so that I can note the origin and aquisition history of fragments when available.
+    - As a content editor, I want clearer help text for the name field of the person page so I know how best to present people's names on their pages
+    - As a content editor, I would like to see Historic Shelfmark on the Document edit page, to ensure that my work is correct when working with old scholarship.
+    - bugfix: Full shelfmark search for multiple shelfmarks not working in admin
+    - bugfix: Invalid lat/long coordinates are allowed for Places, but don't persist
+    - bugfix: People names are not diacritic neutral when adding them from Document Detail page
+
 4.16.1
 ------
 

diff --git a/DEPLOYNOTES.md b/DEPLOYNOTES.md
@@ -1,5 +1,12 @@
 # Deploy Notes
 
+## 4.17
+
+-   Solr configuration has changed. Ensure Solr configset has been updated
+    and then reindex all content: `python manage.py index`
+-   Configure **MAPTILER_API_TOKEN** in local settings for maps to appear.
+-   Anywhere that Node versions are being managed manually, NPM should be upgraded to 8.x, at least 8.1.0.
+
 ## 4.16
 
 -   Import Bodleian catalog numbers from a spreadsheet using the script

diff --git a/README.rst b/README.rst
@@ -19,10 +19,6 @@ Python 3.9 / Django 3.2 / Node 16 / Postgresql / Solr 9.2
    :target: https://codecov.io/gh/Princeton-CDH/geniza
    :alt: Code coverage
 
-.. image:: https://requires.io/github/Princeton-CDH/geniza/requirements.svg?branch=main
-     :target: https://requires.io/github/Princeton-CDH/geniza/requirements/?branch=main
-     :alt: Requirements Status
-
 .. image:: https://github.com/Princeton-CDH/geniza/workflows/dbdocs/badge.svg
     :target: https://dbdocs.io/princetoncdh/geniza
     :alt: dbdocs build

diff --git a/geniza/__init__.py b/geniza/__init__.py
@@ -1,4 +1,4 @@
-__version_info__ = (4, 16, 1, None)
+__version_info__ = (4, 17, 0, None)
 
 
 # Dot-connect all but the last. Last is dash-connected if not None.

diff --git a/geniza/admin.py b/geniza/admin.py
@@ -1,3 +1,4 @@
+from django.conf import settings
 from django.contrib import admin
 
 from geniza.corpus.models import Document, Fragment
@@ -29,4 +30,7 @@ def each_context(self, request):
             : self.REVIEW_PREVIEW_MAX
         ]
 
+        # add maptiler token if we have one
+        context["maptiler_token"] = getattr(settings, "MAPTILER_API_TOKEN", "")
+
         return context
diff --git a/geniza/annotations/conftest.py b/geniza/annotations/conftest.py
@@ -62,6 +62,7 @@ def annotation_json(document, source):
                 }
             }
         },
+        "motivation": ["sc:supplementing", "transcribing"],
         "dc:source": source.uri,
     }
 

diff --git a/geniza/annotations/migrations/0006_annotation_block.py b/geniza/annotations/migrations/0006_annotation_block.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.2.16 on 2024-02-07 18:00
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("annotations", "0005_annotation_cleanup_nbsp"),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="annotation",
+            name="block",
+            field=models.ForeignKey(
+                null=True,
+                on_delete=django.db.models.deletion.CASCADE,
+                related_name="lines",
+                to="annotations.annotation",
+            ),
+        ),
+    ]
diff --git a/geniza/annotations/models.py b/geniza/annotations/models.py
@@ -1,3 +1,5 @@
+import hashlib
+import json
 import re
 import uuid
 from collections import defaultdict
@@ -59,15 +61,29 @@ def group_by_manifest(self):
 
 
 class Annotation(TrackChangesModel):
-    """Annotation model for storing annotations in the database."""
+    """Annotation model for storing annotations in the database.
+
+    Annotations may be either block-level or line-level. Block-level annotation is the default;
+    in most cases, a block-level annotation's content is stored as a TextualBody in its `content`
+    JSON.
+
+    However, block-level annotations may also be used to group line-level annotations, in which case
+    they have no textual content themselves, except for an optional label. Instead, their content
+    is serialized by joining TextualBody values from all associated line-level annotations.
+
+    Line-level annotations are associated with blocks via the `block` property, and that relationship
+    is serialized as `partOf` at the root of the line-level annotation."""
 
     #: annotation id (uuid, autogenerated when created)
     id = models.UUIDField(primary_key=True, default=uuid.uuid4, editable=False)
     #: date created
     created = models.DateTimeField(auto_now_add=True)
     #: date last modified
     modified = models.DateTimeField(auto_now=True)
-    #: json content of the annotation
+    #: json content of the annotation. in addition to W3C Web Annotation Data Model properties,
+    #: may also include: "schema:position", which tracks the order of the annotation with respect
+    #: to others in the same block or canvas; and "textGranularity", which indicates whether this
+    #: is a block- or line-level annotation
     content = models.JSONField()
     #: optional canonical identifier, for annotations imported from another source
     canonical = models.CharField(max_length=255, blank=True)
@@ -79,6 +95,14 @@ class Annotation(TrackChangesModel):
         on_delete=models.CASCADE,
         null=False,
     )
+    #: block-level annotation associated with this (line-level) annotation. if null, this is a
+    #: block-level annotation. if a block is deleted, all associated lines will be deleted.
+    block = models.ForeignKey(
+        to="Annotation",
+        on_delete=models.CASCADE,
+        related_name="lines",
+        null=True,
+    )
 
     # use custom manager & queryset
     objects = AnnotationQuerySet.as_manager()
@@ -140,6 +164,29 @@ def body_content(self):
         except IndexError:
             pass
 
+    @cached_property
+    def block_content_html(self):
+        """convenience method to get HTML content, including label and any associated lines,
+        of a block-level annotation, as a list of HTML strings"""
+        content = []
+        if self.label:
+            content.append(f"<h3>{self.label}</h3>")
+        if self.has_lines:
+            # if this block annotation has separate line annotations, serialize as ordered list
+            content.append("<ol>")
+            for l in self.lines.all().order_by("content__schema:position"):
+                content.append(f"<li>{l.body_content}</li>")
+            content.append("</ol>")
+        elif self.body_content:
+            content.append(self.body_content)
+        return content
+
+    @cached_property
+    def has_lines(self):
+        """cached property to indicate whether or not this is a block-level
+        annotation with line-level children"""
+        return self.lines.exists()
+
     def set_content(self, data):
         """Set or update annotation content and model fields.
 
@@ -148,7 +195,7 @@ def set_content(self, data):
         and the data will not be saved.
         """
         # remove any values tracked on the model; redundant in json field
-        for val in ["id", "created", "modified", "@context", "type"]:
+        for val in ["id", "created", "modified", "@context", "type", "etag"]:
             if val in data:
                 del data[val]
 
@@ -202,6 +249,18 @@ def sanitize_html(cls, html):
         else:
             return cleaned_html
 
+    @property
+    def etag(self):
+        """Compute and return an md5 hash of content to use as an ETag.
+
+        NOTE: Only :attr:`content` can be modified in the editor, so it is the only hashed
+        attribute. If other attributes become mutable, modify this function to include them in
+        the ETag computation."""
+        # must be a string encoded as utf-8 to compute md5 hash
+        content_str = json.dumps(self.content, sort_keys=True).encode("utf-8")
+        # ETag should be wrapped in double quotes, per Django @condition docs
+        return f'"{hashlib.md5(content_str).hexdigest()}"'
+
     def compile(self, include_context=True):
         """Combine annotation data and return as a dictionary that
         can be serialized as JSON.  Includes context by default,
@@ -213,6 +272,11 @@ def compile(self, include_context=True):
         anno = {}
         if include_context:
             anno = {"@context": "http://www.w3.org/ns/anno.jsonld"}
+        else:
+            # NOTE: ETag required here for inclusion in annotation list, which is how
+            # annotations are fetched during editing; need to associate each ETag with
+            # an individual annotation, for comparison with response ETag on POST/DELETE
+            anno = {"etag": self.etag}
 
         # define fields in desired order
         anno.update(
@@ -246,4 +310,9 @@ def compile(self, include_context=True):
         # overwrite with the base annotation data in case of any collisions
         # between content and model fields
         anno.update(base_anno)
+
+        # if this is a line-level annotation with block, include in content
+        if self.block:
+            anno.update({"partOf": self.block.uri()})
+
         return anno
diff --git a/geniza/annotations/tests/test_annotations_models.py b/geniza/annotations/tests/test_annotations_models.py
@@ -18,6 +18,27 @@ def test_get_absolute_url(self):
         anno = Annotation()
         assert anno.get_absolute_url() == "/annotations/%s/" % anno.pk
 
+    def test_etag(self, annotation):
+        old_etag = annotation.etag
+        # should be surrounded by doublequotes
+        assert old_etag[0] == old_etag[-1] == '"'
+        # should be length of an md5 hash + two characters
+        assert len(old_etag) == 34
+        # changing content should change etag
+        annotation.content.update(
+            {
+                "foo": "bar",
+                "id": "bogus",
+                "created": "yesterday",
+                "modified": "today",
+            }
+        )
+        assert annotation.etag != old_etag
+        new_etag = annotation.etag
+        # changing other properties on the annotation should not change etag
+        annotation.footnote = Footnote()
+        assert annotation.etag == new_etag
+
     @pytest.mark.django_db
     def test_uri(self):
         anno = Annotation()
@@ -31,12 +52,13 @@ def test_set_content(self, source, document):
             "id": absolutize_url("/annotations/1"),
             "type": "Annotation",
             "foo": "bar",
+            "etag": "abcd1234",
         }
         anno = Annotation(footnote=footnote)
         anno.set_content(content)
 
         # check that appropriate fields were removed
-        for field in ["@context", "id", "type"]:
+        for field in ["@context", "id", "type", "etag"]:
             assert field not in anno.content
         # remaining content was set
         assert anno.content["foo"] == "bar"
@@ -116,6 +138,18 @@ def test_compile(self, annotation):
         assert compiled["canonical"] == annotation.canonical
         assert compiled["via"] == annotation.via
 
+        line = Annotation.objects.create(
+            footnote=annotation.footnote, block=annotation, content={}
+        )
+        compiled = line.compile()
+        assert compiled["partOf"] == annotation.uri()
+
+        # when include_context=False (i.e. part of a list), should include etag, since
+        # we need a way to associate individual ETag to each item returned in list response
+        compiled = line.compile(include_context=False)
+        assert compiled["etag"] == line.etag
+        assert "@context" not in compiled
+
     def test_sanitize_html(self):
         html = '<table><div><p style="foo:bar;">test</p></div><ol><li>line</li></ol></table>'
         # should strip out all unwanted elements and attributes (table, div, style)
@@ -134,6 +168,40 @@ def test_sanitize_html(self):
         html = "<p>text\xa0and more \xa0 text</p>"
         assert Annotation.sanitize_html(html) == "<p>text and more text</p>"
 
+    def test_block_content_html(self, annotation):
+        annotation.content["body"][0]["label"] = "Test label"
+        # should include label and content
+        block_html = annotation.block_content_html
+        assert len(block_html) == 2
+        assert block_html[0] == "<h3>Test label</h3>"
+        assert block_html[1] == annotation.body_content
+
+        # with associated lines, should produce ordered list
+        del annotation.content["body"][0]["value"]
+        line_1 = Annotation.objects.create(
+            block=annotation,
+            content={"body": [{"value": "Line 1"}], "schema:position": 1},
+            footnote=annotation.footnote,
+        )
+        line_2 = Annotation.objects.create(
+            block=annotation,
+            content={"body": [{"value": "Line 2"}], "schema:position": 2},
+            footnote=annotation.footnote,
+        )
+
+        # invalidate cached properties
+        del annotation.has_lines
+        del annotation.block_content_html
+
+        # should now show that it has lines and produce the ordered list
+        assert annotation.has_lines == True
+        block_html = annotation.block_content_html
+        assert len(block_html) == 5
+        assert block_html[0] == "<h3>Test label</h3>"
+        assert block_html[1] == "<ol>"
+        assert block_html[2] == f"<li>{line_1.body_content}</li>"
+        assert block_html[3] == f"<li>{line_2.body_content}</li>"
+
 
 @pytest.mark.django_db
 class TestAnnotationQuerySet: