Skip to content

Commit

Permalink
Merge pull request #1444 from Princeton-CDH/feature/1429-dates-merge
Browse files Browse the repository at this point in the history
Resolve document and inferred dates in document merge (#1429)
  • Loading branch information
blms authored Sep 11, 2023
2 parents b9d3aa6 + 905329d commit 3b0685c
Show file tree
Hide file tree
Showing 4 changed files with 176 additions and 5 deletions.
45 changes: 45 additions & 0 deletions geniza/corpus/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,8 +1302,53 @@ def merge_with(self, merge_docs, rationale, user=None):
needs_review = [self.needs_review] if self.needs_review else []

for doc in merge_docs:
# handle document dates validation before making any changes;
# mismatch should result in exception (caught by DocumentMerge.form_valid)
if (
(
# both documents have standard dates, and they don't match
doc.doc_date_standard
and self.doc_date_standard
and self.doc_date_standard != doc.doc_date_standard
)
or (
# both documents have original dates, and they don't match
doc.doc_date_original
and self.doc_date_original
and self.doc_date_original != doc.doc_date_original
)
or (
# other document has original, this doc has standard, and they don't match
doc.doc_date_original
and self.doc_date_standard
and doc.standardize_date() != self.doc_date_standard
)
or (
# other document has standard, this doc has original, and they don't match
doc.doc_date_standard
and self.doc_date_original
and self.standardize_date() != doc.doc_date_standard
)
):
raise ValidationError(
"Merged documents must not contain conflicting dates; resolve before merge"
)

# add any tags from merge document tags to primary doc
self.tags.add(*doc.tags.names())

# if not in conflict (i.e. missing or exact duplicate), copy dates to result document
if doc.doc_date_standard:
self.doc_date_standard = doc.doc_date_standard
if doc.doc_date_original:
self.doc_date_original = doc.doc_date_original
self.doc_date_calendar = doc.doc_date_calendar

# add inferred datings (conflicts or duplicates are post-merge
# data cleanup tasks)
for dating in doc.dating_set.all():
self.dating_set.add(dating)

# initialize old pgpid list if previously unset
if self.old_pgpids is None:
self.old_pgpids = []
Expand Down
100 changes: 100 additions & 0 deletions geniza/corpus/tests/test_corpus_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from geniza.corpus.dates import Calendar
from geniza.corpus.models import (
Collection,
Dating,
Document,
DocumentType,
Fragment,
Expand Down Expand Up @@ -1653,6 +1654,105 @@ def test_document_merge_with_log_entries(document, join):
assert " [PGPID %s]" % join_pk in moved_log.change_message


def test_document_merge_with_dates(document, join):
editor = User.objects.get_or_create(username="editor")[0]

# clone join for additional merges
join_clones = []
for _ in range(4):
join_clone = Document.objects.get(pk=join.pk)
join_clone.pk = None
join_clone.save()
join_clones.append(join_clone)

# create some datings; doesn't matter that they are identical, as cleaning
# up post-merge dupes is a manual data cleanup task. unit test will make
# sure that doesn't cause errors!
dating_1 = Dating.objects.create(
document=document,
display_date="1000 CE",
standard_date="1000",
rationale=Dating.PALEOGRAPHY,
notes="a note",
)
dating_2 = Dating.objects.create(
document=join_clone,
display_date="1000 CE",
standard_date="1000",
rationale=Dating.PALEOGRAPHY,
notes="a note",
)

# should raise ValidationError on conflicting dates
document.doc_date_standard = "1230-01-01"
join.doc_date_standard = "1234-01-01"
with pytest.raises(ValidationError):
document.merge_with([join], "test", editor)

# should use any existing dates if one of the merged documents has one
join.doc_date_standard = ""
document.merge_with([join], "test", editor)
assert document.doc_date_standard == "1230-01-01"

document.doc_date_standard = ""
document.doc_date_original = ""
document.doc_date_calendar = ""
join_clones[0].doc_date_original = "15 Tevet 4990"
join_clones[0].doc_date_calendar = Calendar.ANNO_MUNDI
document.merge_with([join_clones[0]], "test", editor)
assert document.doc_date_original == "15 Tevet 4990"
assert document.doc_date_calendar == Calendar.ANNO_MUNDI

# should raise error if one document's standard date conflicts with other document's
# original date
document.doc_date_original = ""
document.doc_date_standard = "1230-01-01"
join_clones[1].doc_date_original = "1 Tevet 5000"
join_clones[1].doc_date_calendar = Calendar.ANNO_MUNDI
with pytest.raises(ValidationError):
document.merge_with([join_clones[1]], "test", editor)

document.doc_date_standard = ""
document.doc_date_original = "1 Tevet 5000"
document.doc_date_calendar = Calendar.ANNO_MUNDI
join_clones[1].doc_date_original = ""
join_clones[1].doc_date_standard = "1230-01-01"
with pytest.raises(ValidationError):
document.merge_with([join_clones[1]], "test", editor)

# should not raise error on identical dates
document.doc_date_standard = "1230-01-01"
document.doc_date_original = "15 Tevet 4990"
document.doc_date_calendar = Calendar.ANNO_MUNDI
join_clones[1].doc_date_standard = "1230-01-01"
join_clones[1].doc_date_original = "15 Tevet 4990"
join_clones[1].doc_date_calendar = Calendar.ANNO_MUNDI
document.merge_with([join_clones[1]], "test", editor)

# should consider identical if one doc's standardized original date = other doc's standard date
document.doc_date_standard = "1230-01-01"
document.doc_date_original = ""
document.doc_date_calendar = ""
join_clones[2].doc_date_standard = ""
join_clones[2].doc_date_original = "15 Tevet 4990"
join_clones[2].doc_date_calendar = Calendar.ANNO_MUNDI
document.merge_with([join_clones[2]], "test", editor)
assert document.doc_date_original == "15 Tevet 4990"
assert document.doc_date_calendar == Calendar.ANNO_MUNDI

document.doc_date_standard = ""
join_clones[3].doc_date_standard = "1230-01-01"
join_clones[3].doc_date_original = ""
join_clones[3].doc_date_calendar = ""
document.merge_with([join_clones[3]], "test", editor)
assert document.doc_date_standard == "1230-01-01"

# should carry over all inferred datings without error, even if they are identical
assert document.dating_set.count() == 2
result_pks = [dating.pk for dating in document.dating_set.all()]
assert dating_1.pk in result_pks and dating_2.pk in result_pks


def test_document_get_by_any_pgpid(document):
# get by current pk
assert Document.get_by_any_pgpid(document.pk) == document
Expand Down
23 changes: 19 additions & 4 deletions geniza/corpus/tests/test_corpus_views.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import re
from datetime import datetime
from time import sleep
from unittest.mock import ANY, MagicMock, Mock, patch
Expand All @@ -8,6 +7,7 @@
from django.contrib.admin.models import ADDITION, LogEntry
from django.contrib.auth.models import User
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ValidationError
from django.test import TestCase
from django.urls import resolve, reverse
from django.utils.text import Truncator, slugify
Expand Down Expand Up @@ -1484,8 +1484,9 @@ def test_document_merge(self, admin_client, client):
assert response.status_code == 200

# POST should merge
merge_url = "%s?ids=%s" % (reverse("admin:document-merge"), idstring)
response = admin_client.post(
"%s?ids=%s" % (reverse("admin:document-merge"), idstring),
merge_url,
{"primary_document": doc1.id, "rationale": "duplicate"},
follow=True,
)
Expand All @@ -1500,7 +1501,7 @@ def test_document_merge(self, admin_client, client):
with patch.object(Document, "merge_with") as mock_merge_with:
# should pick up rationale notes as parenthetical
response = admin_client.post(
"%s?ids=%s" % (reverse("admin:document-merge"), idstring),
merge_url,
{
"primary_document": doc1.id,
"rationale": "duplicate",
Expand All @@ -1512,7 +1513,7 @@ def test_document_merge(self, admin_client, client):

# with "other", should use rationale notes as rationale string
response = admin_client.post(
"%s?ids=%s" % (reverse("admin:document-merge"), idstring),
merge_url,
{
"primary_document": doc1.id,
"rationale": "other",
Expand All @@ -1522,6 +1523,20 @@ def test_document_merge(self, admin_client, client):
)
mock_merge_with.assert_called_with(ANY, "test", user=ANY)

# should catch ValidationError and send back to form with error msg
mock_merge_with.side_effect = ValidationError("test message")
response = admin_client.post(
merge_url,
{
"primary_document": doc1.id,
"rationale": "duplicate",
},
follow=True,
)
TestCase().assertRedirects(response, merge_url)
messages = [str(msg) for msg in list(response.context["messages"])]
assert "test message" in messages


class TestTagMergeView:
# adapted from TestDocumentMergeView
Expand Down
13 changes: 12 additions & 1 deletion geniza/corpus/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from django.contrib.admin.models import CHANGE, LogEntry
from django.contrib.auth.mixins import PermissionRequiredMixin
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ValidationError
from django.db.models import Q
from django.db.models.query import Prefetch
from django.http import Http404, HttpResponse, JsonResponse
Expand Down Expand Up @@ -691,7 +692,17 @@ def form_valid(self, form):

# Merge secondary documents into the selected primary document
user = getattr(self.request, "user", None)
primary_doc.merge_with(secondary_docs, rationale, user=user)

try:
primary_doc.merge_with(secondary_docs, rationale, user=user)
except ValidationError as err:
# in case the merge resulted in an error, display error to user
messages.error(self.request, err.message)
# redirect to this form page instead of one of the documents
return HttpResponseRedirect(
"%s?ids=%s"
% (reverse("admin:document-merge"), self.request.GET.get("ids", "")),
)

# Display info about the merge to the user
new_doc_link = reverse("admin:corpus_document_change", args=[primary_doc.id])
Expand Down

0 comments on commit 3b0685c

Please sign in to comment.