diff --git a/geniza/corpus/models.py b/geniza/corpus/models.py index 8f72e5e91..828d5f64b 100644 --- a/geniza/corpus/models.py +++ b/geniza/corpus/models.py @@ -1302,8 +1302,53 @@ def merge_with(self, merge_docs, rationale, user=None): needs_review = [self.needs_review] if self.needs_review else [] for doc in merge_docs: + # handle document dates validation before making any changes; + # mismatch should result in exception (caught by DocumentMerge.form_valid) + if ( + ( + # both documents have standard dates, and they don't match + doc.doc_date_standard + and self.doc_date_standard + and self.doc_date_standard != doc.doc_date_standard + ) + or ( + # both documents have original dates, and they don't match + doc.doc_date_original + and self.doc_date_original + and self.doc_date_original != doc.doc_date_original + ) + or ( + # other document has original, this doc has standard, and they don't match + doc.doc_date_original + and self.doc_date_standard + and doc.standardize_date() != self.doc_date_standard + ) + or ( + # other document has standard, this doc has original, and they don't match + doc.doc_date_standard + and self.doc_date_original + and self.standardize_date() != doc.doc_date_standard + ) + ): + raise ValidationError( + "Merged documents must not contain conflicting dates; resolve before merge" + ) + # add any tags from merge document tags to primary doc self.tags.add(*doc.tags.names()) + + # if not in conflict (i.e. missing or exact duplicate), copy dates to result document + if doc.doc_date_standard: + self.doc_date_standard = doc.doc_date_standard + if doc.doc_date_original: + self.doc_date_original = doc.doc_date_original + self.doc_date_calendar = doc.doc_date_calendar + + # add inferred datings (conflicts or duplicates are post-merge + # data cleanup tasks) + for dating in doc.dating_set.all(): + self.dating_set.add(dating) + # initialize old pgpid list if previously unset if self.old_pgpids is None: self.old_pgpids = [] diff --git a/geniza/corpus/tests/test_corpus_models.py b/geniza/corpus/tests/test_corpus_models.py index e90b9f54a..e55e7bb9e 100644 --- a/geniza/corpus/tests/test_corpus_models.py +++ b/geniza/corpus/tests/test_corpus_models.py @@ -25,6 +25,7 @@ from geniza.corpus.dates import Calendar from geniza.corpus.models import ( Collection, + Dating, Document, DocumentType, Fragment, @@ -1653,6 +1654,105 @@ def test_document_merge_with_log_entries(document, join): assert " [PGPID %s]" % join_pk in moved_log.change_message +def test_document_merge_with_dates(document, join): + editor = User.objects.get_or_create(username="editor")[0] + + # clone join for additional merges + join_clones = [] + for _ in range(4): + join_clone = Document.objects.get(pk=join.pk) + join_clone.pk = None + join_clone.save() + join_clones.append(join_clone) + + # create some datings; doesn't matter that they are identical, as cleaning + # up post-merge dupes is a manual data cleanup task. unit test will make + # sure that doesn't cause errors! + dating_1 = Dating.objects.create( + document=document, + display_date="1000 CE", + standard_date="1000", + rationale=Dating.PALEOGRAPHY, + notes="a note", + ) + dating_2 = Dating.objects.create( + document=join_clone, + display_date="1000 CE", + standard_date="1000", + rationale=Dating.PALEOGRAPHY, + notes="a note", + ) + + # should raise ValidationError on conflicting dates + document.doc_date_standard = "1230-01-01" + join.doc_date_standard = "1234-01-01" + with pytest.raises(ValidationError): + document.merge_with([join], "test", editor) + + # should use any existing dates if one of the merged documents has one + join.doc_date_standard = "" + document.merge_with([join], "test", editor) + assert document.doc_date_standard == "1230-01-01" + + document.doc_date_standard = "" + document.doc_date_original = "" + document.doc_date_calendar = "" + join_clones[0].doc_date_original = "15 Tevet 4990" + join_clones[0].doc_date_calendar = Calendar.ANNO_MUNDI + document.merge_with([join_clones[0]], "test", editor) + assert document.doc_date_original == "15 Tevet 4990" + assert document.doc_date_calendar == Calendar.ANNO_MUNDI + + # should raise error if one document's standard date conflicts with other document's + # original date + document.doc_date_original = "" + document.doc_date_standard = "1230-01-01" + join_clones[1].doc_date_original = "1 Tevet 5000" + join_clones[1].doc_date_calendar = Calendar.ANNO_MUNDI + with pytest.raises(ValidationError): + document.merge_with([join_clones[1]], "test", editor) + + document.doc_date_standard = "" + document.doc_date_original = "1 Tevet 5000" + document.doc_date_calendar = Calendar.ANNO_MUNDI + join_clones[1].doc_date_original = "" + join_clones[1].doc_date_standard = "1230-01-01" + with pytest.raises(ValidationError): + document.merge_with([join_clones[1]], "test", editor) + + # should not raise error on identical dates + document.doc_date_standard = "1230-01-01" + document.doc_date_original = "15 Tevet 4990" + document.doc_date_calendar = Calendar.ANNO_MUNDI + join_clones[1].doc_date_standard = "1230-01-01" + join_clones[1].doc_date_original = "15 Tevet 4990" + join_clones[1].doc_date_calendar = Calendar.ANNO_MUNDI + document.merge_with([join_clones[1]], "test", editor) + + # should consider identical if one doc's standardized original date = other doc's standard date + document.doc_date_standard = "1230-01-01" + document.doc_date_original = "" + document.doc_date_calendar = "" + join_clones[2].doc_date_standard = "" + join_clones[2].doc_date_original = "15 Tevet 4990" + join_clones[2].doc_date_calendar = Calendar.ANNO_MUNDI + document.merge_with([join_clones[2]], "test", editor) + assert document.doc_date_original == "15 Tevet 4990" + assert document.doc_date_calendar == Calendar.ANNO_MUNDI + + document.doc_date_standard = "" + join_clones[3].doc_date_standard = "1230-01-01" + join_clones[3].doc_date_original = "" + join_clones[3].doc_date_calendar = "" + document.merge_with([join_clones[3]], "test", editor) + assert document.doc_date_standard == "1230-01-01" + + # should carry over all inferred datings without error, even if they are identical + assert document.dating_set.count() == 2 + result_pks = [dating.pk for dating in document.dating_set.all()] + assert dating_1.pk in result_pks and dating_2.pk in result_pks + + def test_document_get_by_any_pgpid(document): # get by current pk assert Document.get_by_any_pgpid(document.pk) == document diff --git a/geniza/corpus/tests/test_corpus_views.py b/geniza/corpus/tests/test_corpus_views.py index c3b89afef..a37684306 100644 --- a/geniza/corpus/tests/test_corpus_views.py +++ b/geniza/corpus/tests/test_corpus_views.py @@ -1,4 +1,3 @@ -import re from datetime import datetime from time import sleep from unittest.mock import ANY, MagicMock, Mock, patch @@ -8,6 +7,7 @@ from django.contrib.admin.models import ADDITION, LogEntry from django.contrib.auth.models import User from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ValidationError from django.test import TestCase from django.urls import resolve, reverse from django.utils.text import Truncator, slugify @@ -1484,8 +1484,9 @@ def test_document_merge(self, admin_client, client): assert response.status_code == 200 # POST should merge + merge_url = "%s?ids=%s" % (reverse("admin:document-merge"), idstring) response = admin_client.post( - "%s?ids=%s" % (reverse("admin:document-merge"), idstring), + merge_url, {"primary_document": doc1.id, "rationale": "duplicate"}, follow=True, ) @@ -1500,7 +1501,7 @@ def test_document_merge(self, admin_client, client): with patch.object(Document, "merge_with") as mock_merge_with: # should pick up rationale notes as parenthetical response = admin_client.post( - "%s?ids=%s" % (reverse("admin:document-merge"), idstring), + merge_url, { "primary_document": doc1.id, "rationale": "duplicate", @@ -1512,7 +1513,7 @@ def test_document_merge(self, admin_client, client): # with "other", should use rationale notes as rationale string response = admin_client.post( - "%s?ids=%s" % (reverse("admin:document-merge"), idstring), + merge_url, { "primary_document": doc1.id, "rationale": "other", @@ -1522,6 +1523,20 @@ def test_document_merge(self, admin_client, client): ) mock_merge_with.assert_called_with(ANY, "test", user=ANY) + # should catch ValidationError and send back to form with error msg + mock_merge_with.side_effect = ValidationError("test message") + response = admin_client.post( + merge_url, + { + "primary_document": doc1.id, + "rationale": "duplicate", + }, + follow=True, + ) + TestCase().assertRedirects(response, merge_url) + messages = [str(msg) for msg in list(response.context["messages"])] + assert "test message" in messages + class TestTagMergeView: # adapted from TestDocumentMergeView diff --git a/geniza/corpus/views.py b/geniza/corpus/views.py index 97ae60a8e..db629eba0 100644 --- a/geniza/corpus/views.py +++ b/geniza/corpus/views.py @@ -8,6 +8,7 @@ from django.contrib.admin.models import CHANGE, LogEntry from django.contrib.auth.mixins import PermissionRequiredMixin from django.contrib.contenttypes.models import ContentType +from django.core.exceptions import ValidationError from django.db.models import Q from django.db.models.query import Prefetch from django.http import Http404, HttpResponse, JsonResponse @@ -691,7 +692,17 @@ def form_valid(self, form): # Merge secondary documents into the selected primary document user = getattr(self.request, "user", None) - primary_doc.merge_with(secondary_docs, rationale, user=user) + + try: + primary_doc.merge_with(secondary_docs, rationale, user=user) + except ValidationError as err: + # in case the merge resulted in an error, display error to user + messages.error(self.request, err.message) + # redirect to this form page instead of one of the documents + return HttpResponseRedirect( + "%s?ids=%s" + % (reverse("admin:document-merge"), self.request.GET.get("ids", "")), + ) # Display info about the merge to the user new_doc_link = reverse("admin:corpus_document_change", args=[primary_doc.id])