Merge pull request #1448 from Princeton-CDH/1442-no-language-source

Add "unspecified" source language option (#1442)
Princeton-CDH · Sep 11, 2023 · f4dc0f3 · f4dc0f3
2 parents 0a9299e + 2788c84
commit f4dc0f3
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 9 deletions.
diff --git a/geniza/footnotes/metadata_export.py b/geniza/footnotes/metadata_export.py
@@ -58,7 +58,9 @@ def get_export_data_dict(self, source):
             "edition": source.edition,
             "other_info": source.other_info,
             "page_range": source.page_range,
-            "languages": {lang.name for lang in source.languages.all()},
+            "languages": {
+                lang.name for lang in source.languages.all() if lang.code != "zxx"
+            },
             "url": source.url,
             "notes": source.notes,
             # count via annotated queryset

diff --git a/geniza/footnotes/migrations/0031_unspecified_source_language.py b/geniza/footnotes/migrations/0031_unspecified_source_language.py
@@ -0,0 +1,31 @@
+# Generated by Django 3.2.16 on 2023-08-22 19:13
+
+from django.db import migrations, models
+
+
+def create_unspecified_source_language(apps, schema_editor):
+    SourceLanguage = apps.get_model("footnotes", "SourceLanguage")
+    SourceLanguage.objects.get_or_create(
+        name="Unspecified (unpublished transcriptions)", code="zxx"
+    )
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ("footnotes", "0030_digital_footnote_location"),
+    ]
+
+    operations = [
+        migrations.RunPython(
+            create_unspecified_source_language,
+            reverse_code=migrations.RunPython.noop,
+        ),
+        migrations.AlterField(
+            model_name="source",
+            name="languages",
+            field=models.ManyToManyField(
+                help_text="The language(s) the source is written in. Note: The Unspecified language\n        option should only ever be used for unpublished transcriptions, as the language of the\n        transcription is already marked on the document.",
+                to="footnotes.SourceLanguage",
+            ),
+        ),
+    ]
diff --git a/geniza/footnotes/models.py b/geniza/footnotes/models.py
@@ -167,8 +167,9 @@ class Source(models.Model):
     )
     languages = models.ManyToManyField(
         SourceLanguage,
-        help_text="""The language(s) the source is written in. Note: Sources should never include
-        transcription language unless the entire source consists of a transcription.""",
+        help_text="""The language(s) the source is written in. Note: The Unspecified language
+        option should only ever be used for unpublished transcriptions, as the language of the
+        transcription is already marked on the document.""",
     )
     url = models.URLField(blank=True, max_length=300, verbose_name="URL")
     # preliminary place to store transcription text; should not be editable
@@ -267,11 +268,12 @@ def formatted_display(self, extra_fields=True):
                 parts.append(edition_str)
 
         # Add non-English languages as parenthetical
-        non_english_langs = 0
+        included_langs = 0
         if self.languages.exists():
             for lang in self.languages.all():
-                if "English" not in str(lang):
-                    non_english_langs += 1
+                # Also prevent Unspecified from showing up in source citations
+                if "English" not in str(lang) and "Unspecified" not in str(lang):
+                    included_langs += 1
                     parts.append("(in %s)" % lang)
 
         # Handling presence of book/journal title
@@ -282,7 +284,7 @@ def formatted_display(self, extra_fields=True):
             #   NOT "Title" (in Hebrew) --> "Title," (in Hebrew)
             if self.title and (
                 self.source_type.type in doublequoted_types
-                and not non_english_langs  # put comma after language even when doublequotes present
+                and not included_langs  # put comma after language even when doublequotes present
             ):
                 # find rightmost doublequote
                 formatted_title = parts[-1]
@@ -386,7 +388,7 @@ def formatted_display(self, extra_fields=True):
         use_comma = (
             extra_fields
             or self.title
-            or (self.journal and not non_english_langs)
+            or (self.journal and not included_langs)
             or self.source_type.type == "Unpublished"
         )
         delimiter = ", " if use_comma else " "

diff --git a/geniza/footnotes/tests/test_footnote_metadata_export.py b/geniza/footnotes/tests/test_footnote_metadata_export.py
@@ -6,7 +6,7 @@
     FootnoteExporter,
     SourceExporter,
 )
-from geniza.footnotes.models import Footnote
+from geniza.footnotes.models import Footnote, SourceLanguage
 
 
 @pytest.mark.django_db
@@ -38,6 +38,13 @@ def test_source_export_data(source):
     assert data["num_footnotes"] == 0
     assert "url_admin" not in data
 
+    # should not include Unspecified language
+    source.languages.clear()
+    source.languages.add(SourceLanguage.objects.get(code="zxx"))
+    source_obj = src_exporter.get_queryset().get(pk=source.id)
+    data = src_exporter.get_export_data_dict(source_obj)
+    assert not data["languages"]
+
 
 @pytest.mark.django_db
 def test_admin_source_export_data(source):

diff --git a/geniza/footnotes/tests/test_footnote_models.py b/geniza/footnotes/tests/test_footnote_models.py
@@ -96,6 +96,13 @@ def test_str_article(self, article):
         )
 
     def test_str_language(self, article):
+        # English should not show up in citation
+        article.languages.add(SourceLanguage.objects.get(code="en"))
+        assert "English" not in str(article)
+        # unspecified language should not show up in citation
+        article.languages.add(SourceLanguage.objects.get(code="zxx"))
+        assert "Unspecified" not in str(article)
+        # other non-english languages should
         article.languages.add(SourceLanguage.objects.get(name="Hebrew"))
         assert "(in Hebrew)" in str(article)