From ed106654ccb193907d1caa92e11435cb11b93f3b Mon Sep 17 00:00:00 2001 From: Tim Schilling Date: Tue, 19 Nov 2024 13:23:49 -0600 Subject: [PATCH] Updated bulk_create_with_history to support update_conflicts of bulk_create --- CHANGES.rst | 2 + docs/common_issues.rst | 12 +++- simple_history/manager.py | 9 ++- simple_history/tests/models.py | 8 +++ simple_history/tests/tests/test_manager.py | 10 +++ simple_history/tests/tests/test_utils.py | 57 +++++++++++++++- simple_history/utils.py | 75 ++++++++++++++++++++-- 7 files changed, 164 insertions(+), 9 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 9c44563c3..afae06131 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -7,6 +7,8 @@ Unreleased - Made ``skip_history_when_saving`` work when creating an object - not just when updating an object (gh-1262) - Improved performance of the ``latest_of_each()`` history manager method (gh-1360) +- Updated ``bulk_create_with_history`` to support ``update_conflicts`` and + ``update_fields`` of ``bulk_create()`` (gh-1323) 3.7.0 (2024-05-29) ------------------ diff --git a/docs/common_issues.rst b/docs/common_issues.rst index b1c9a396a..a647a3602 100644 --- a/docs/common_issues.rst +++ b/docs/common_issues.rst @@ -32,8 +32,18 @@ history: >>> Poll.history.count() 1000 +You can also use ``bulk_create_with_history`` with Django's ``update_conflicts`` where +you can specify the fields to update in case of a conflict. This incurs an additional +query to refetch the object to update the history because the full object is not +returned. If a record is updated, the historical record will have a `history_type` of +``'~'``, meaning updated. + +Note: if a record is updated, but there were no historical +records for that instance, the historical record will have a `history_type` of ``'+'``. + + If you want to specify a change reason or history user for each record in the bulk create, -you can add `_change_reason`, `_history_user` or `_history_date` on each instance: +you can add `_change_reason`, `_history_user`, `_history_date` on each instance: .. code-block:: pycon diff --git a/simple_history/manager.py b/simple_history/manager.py index 2d9bda656..1ab8a3dd6 100644 --- a/simple_history/manager.py +++ b/simple_history/manager.py @@ -1,3 +1,6 @@ +from contextlib import ContextDecorator +from typing import Optional + from django.conf import settings from django.db import models from django.db.models import Exists, OuterRef, Q, QuerySet @@ -225,9 +228,9 @@ def bulk_history_create( if not getattr(settings, "SIMPLE_HISTORY_ENABLED", True): return - history_type = "+" + default_history_type = "+" if update: - history_type = "~" + default_history_type = "~" historical_instances = [] for instance in objs: @@ -243,7 +246,7 @@ def bulk_history_create( history_user=history_user, history_change_reason=get_change_reason_from_object(instance) or default_change_reason, - history_type=history_type, + history_type=getattr(instance, "_history_type", default_history_type), **{ field.attname: getattr(instance, field.attname) for field in self.model.tracked_fields diff --git a/simple_history/tests/models.py b/simple_history/tests/models.py index f35b5cf6e..b647d1ca7 100644 --- a/simple_history/tests/models.py +++ b/simple_history/tests/models.py @@ -70,6 +70,14 @@ class PollWithExcludedFieldsWithDefaults(models.Model): ) +class PollWithUniqueQuestionAndWithPlace(models.Model): + question = models.CharField(max_length=200, unique=True) + pub_date = models.DateTimeField("date published", null=True) + place = models.TextField(null=True) + + history = HistoricalRecords() + + class PollWithExcludedFKField(models.Model): question = models.CharField(max_length=200) pub_date = models.DateTimeField("date published") diff --git a/simple_history/tests/tests/test_manager.py b/simple_history/tests/tests/test_manager.py index acb9e0256..1bfc175a8 100644 --- a/simple_history/tests/tests/test_manager.py +++ b/simple_history/tests/tests/test_manager.py @@ -427,6 +427,16 @@ def test_bulk_history_create_with_change_reason(self): ) ) + def test_bulk_history_create_with_history_type(self): + for poll in self.data: + poll._history_type = "~" + + Poll.history.bulk_history_create(self.data) + + self.assertTrue( + all([history.history_type == "~" for history in Poll.history.all()]) + ) + class PrefetchingMethodsTestCase(TestCase): def setUp(self): diff --git a/simple_history/tests/tests/test_utils.py b/simple_history/tests/tests/test_utils.py index 7db701d98..eb88abc41 100644 --- a/simple_history/tests/tests/test_utils.py +++ b/simple_history/tests/tests/test_utils.py @@ -6,7 +6,7 @@ import django from django.contrib.auth import get_user_model from django.db import IntegrityError, transaction -from django.test import TestCase, TransactionTestCase, override_settings +from django.test import TestCase, TransactionTestCase, override_settings, tag from django.utils import timezone from simple_history.exceptions import AlternativeManagerError, NotHistoricalModelError @@ -26,6 +26,7 @@ PollWithSelfManyToMany, PollWithSeveralManyToMany, PollWithUniqueQuestion, + PollWithUniqueQuestionAndWithPlace, Street, ) from simple_history.utils import ( @@ -257,6 +258,60 @@ def test_bulk_create_history_with_duplicates_ignore_conflicts(self): self.assertEqual(PollWithUniqueQuestion.objects.count(), 2) self.assertEqual(PollWithUniqueQuestion.history.count(), 2) + def test_bulk_create_history_with_duplicates_update_conflicts_create_only_field( + self, + ): + poll1 = PollWithUniqueQuestionAndWithPlace.objects.create( + question="Question 1", pub_date=timezone.now(), place="earth" + ) + PollWithUniqueQuestionAndWithPlace.objects.create( + question="Question 2", pub_date=timezone.now(), place="moon" + ) + duplicates = [ + # Reuse the same object that already exists + poll1, + # Match the unique field but with different values for other fields + PollWithUniqueQuestionAndWithPlace( + question="Question 2", pub_date=None, place="sun" + ), + PollWithUniqueQuestionAndWithPlace( + question="Question 3", pub_date=None, place="saturn" + ), + ] + + bulk_create_with_history( + duplicates, + PollWithUniqueQuestionAndWithPlace, + update_conflicts=True, + unique_fields=["question"], + update_fields=["pub_date"], + ) + new1, new2, new3 = list( + PollWithUniqueQuestionAndWithPlace.objects.order_by("question") + ) + # Confirm the first object was updated and has two historical records + self.assertEqual(new1.place, "earth") + self.assertIsNotNone(new1.pub_date) + new1_hist1, new1_hist2 = list(new1.history.all().order_by("history_id")) + self.assertEqual(new1_hist1.history_type, "+") + self.assertEqual(new1_hist2.history_type, "~") + self.assertIsNotNone(new1_hist2.pub_date) + self.assertIsNotNone(new1_hist2.place, "earth") + + # This shouldn't change since it wasn't in update_fields + new2_hist1, new2_hist2 = list(new2.history.all().order_by("history_id")) + self.assertEqual(new2_hist1.history_type, "+") + self.assertEqual(new2_hist2.history_type, "~") + self.assertIsNone(new2_hist2.pub_date) + self.assertIsNotNone(new2_hist2.place, "moon") + + # There should only be 1 addition record including saturn since place + # is inserted, but not updated. + new3_hist = new3.history.get() + self.assertEqual(new3_hist.history_type, "+") + self.assertIsNone(new2_hist2.pub_date) + self.assertIsNotNone(new2_hist2.place, "saturn") + def test_bulk_create_history_with_no_ids_return(self): pub_date = timezone.now() objects = [ diff --git a/simple_history/utils.py b/simple_history/utils.py index a5bafeafc..dc4e51588 100644 --- a/simple_history/utils.py +++ b/simple_history/utils.py @@ -1,5 +1,16 @@ +from itertools import chain + from django.db import transaction -from django.db.models import Case, ForeignKey, ManyToManyField, Q, When +from django.db.models import ( + Case, + Exists, + ForeignKey, + ManyToManyField, + OuterRef, + Q, + Value, + When, +) from django.forms.models import model_to_dict from simple_history.exceptions import AlternativeManagerError, NotHistoricalModelError @@ -86,6 +97,9 @@ def bulk_create_with_history( model, batch_size=None, ignore_conflicts=False, + update_conflicts=False, + update_fields=None, + unique_fields=None, default_user=None, default_change_reason=None, default_date=None, @@ -94,12 +108,24 @@ def bulk_create_with_history( """ Bulk create the objects specified by objs while also bulk creating their history (all in one transaction). + Because of not providing primary key attribute after bulk_create on any DB except Postgres (https://docs.djangoproject.com/en/2.2/ref/models/querysets/#bulk-create) - Divide this process on two transactions for other DB's + this process on will use two transactions for other DB's. + + If ``update_conflicts`` is used, the function will refetch records from the + database to ensure that the historical records match the exact state of the + model instances. + :param objs: List of objs (not yet saved to the db) of type model :param model: Model class that should be created :param batch_size: Number of objects that should be created in each batch + :param ignore_conflicts: Boolean passed through to Django ORM to ignore conflicts + :param update_conflicts: Boolean passed through to Django ORM to update on conflicts + :param update_fields: List of fields passed through to Django ORM to update on + conflict. + :param unique_fields: List of fields through to Django ORM to identify uniqueness + to update. :param default_user: Optional user to specify as the history_user in each historical record :param default_change_reason: Optional change reason to specify as the change_reason @@ -116,16 +142,57 @@ def bulk_create_with_history( field.name for field in model._meta.get_fields() if isinstance(field, ManyToManyField) + or ( + # If using either unique fields or update fields, then we can exclude + # anything not in those lists. + (unique_fields or update_fields) + and field.name not in set(chain(unique_fields or [], update_fields or [])) + ) ] + history_manager = get_history_manager_for_model(model) model_manager = model._default_manager second_transaction_required = True with transaction.atomic(savepoint=False): objs_with_id = model_manager.bulk_create( - objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts + objs, + batch_size=batch_size, + ignore_conflicts=ignore_conflicts, + update_conflicts=update_conflicts, + update_fields=update_fields, + unique_fields=unique_fields, ) - if objs_with_id and objs_with_id[0].pk and not ignore_conflicts: + if ( + objs_with_id + and all(obj.pk for obj in objs_with_id) + and not ignore_conflicts + ): + if update_conflicts: + # If we're updating on conflict, it's possible that some rows + # were updated and some were inserted. We need to refetch + # the data to make sure the historical record matches exactly + # since update_fields could be a subset of the fields. We + # also need to annotate the history_type to indicate if the + # record was updated or inserted. + key_name = get_app_model_primary_key_name(model) + objs_with_id = ( + model_manager.filter(pk__in=[obj.pk for obj in objs_with_id]) + .annotate( + _history_type=Case( + When( + Exists( + history_manager.filter( + **{key_name: OuterRef(key_name)} + ) + ), + then=Value("~"), + ), + default=Value("+"), + ) + ) + .select_for_update() + ) second_transaction_required = False history_manager.bulk_history_create( objs_with_id,