Skip to content

Commit

Permalink
Updated bulk_create_with_history to support update_conflicts of bulk_…
Browse files Browse the repository at this point in the history
…create
  • Loading branch information
tim-schilling committed Nov 19, 2024
1 parent 7fdebc8 commit ed10665
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Unreleased
- Made ``skip_history_when_saving`` work when creating an object - not just when
updating an object (gh-1262)
- Improved performance of the ``latest_of_each()`` history manager method (gh-1360)
- Updated ``bulk_create_with_history`` to support ``update_conflicts`` and
``update_fields`` of ``bulk_create()`` (gh-1323)

3.7.0 (2024-05-29)
------------------
Expand Down
12 changes: 11 additions & 1 deletion docs/common_issues.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,18 @@ history:
>>> Poll.history.count()
1000
You can also use ``bulk_create_with_history`` with Django's ``update_conflicts`` where
you can specify the fields to update in case of a conflict. This incurs an additional
query to refetch the object to update the history because the full object is not
returned. If a record is updated, the historical record will have a `history_type` of
``'~'``, meaning updated.

Note: if a record is updated, but there were no historical
records for that instance, the historical record will have a `history_type` of ``'+'``.


If you want to specify a change reason or history user for each record in the bulk create,
you can add `_change_reason`, `_history_user` or `_history_date` on each instance:
you can add `_change_reason`, `_history_user`, `_history_date` on each instance:

.. code-block:: pycon
Expand Down
9 changes: 6 additions & 3 deletions simple_history/manager.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
from contextlib import ContextDecorator
from typing import Optional

from django.conf import settings
from django.db import models
from django.db.models import Exists, OuterRef, Q, QuerySet
Expand Down Expand Up @@ -225,9 +228,9 @@ def bulk_history_create(
if not getattr(settings, "SIMPLE_HISTORY_ENABLED", True):
return

history_type = "+"
default_history_type = "+"
if update:
history_type = "~"
default_history_type = "~"

historical_instances = []
for instance in objs:
Expand All @@ -243,7 +246,7 @@ def bulk_history_create(
history_user=history_user,
history_change_reason=get_change_reason_from_object(instance)
or default_change_reason,
history_type=history_type,
history_type=getattr(instance, "_history_type", default_history_type),
**{
field.attname: getattr(instance, field.attname)
for field in self.model.tracked_fields
Expand Down
8 changes: 8 additions & 0 deletions simple_history/tests/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@ class PollWithExcludedFieldsWithDefaults(models.Model):
)


class PollWithUniqueQuestionAndWithPlace(models.Model):
question = models.CharField(max_length=200, unique=True)
pub_date = models.DateTimeField("date published", null=True)
place = models.TextField(null=True)

history = HistoricalRecords()


class PollWithExcludedFKField(models.Model):
question = models.CharField(max_length=200)
pub_date = models.DateTimeField("date published")
Expand Down
10 changes: 10 additions & 0 deletions simple_history/tests/tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,16 @@ def test_bulk_history_create_with_change_reason(self):
)
)

def test_bulk_history_create_with_history_type(self):
for poll in self.data:
poll._history_type = "~"

Poll.history.bulk_history_create(self.data)

self.assertTrue(
all([history.history_type == "~" for history in Poll.history.all()])
)


class PrefetchingMethodsTestCase(TestCase):
def setUp(self):
Expand Down
57 changes: 56 additions & 1 deletion simple_history/tests/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import django
from django.contrib.auth import get_user_model
from django.db import IntegrityError, transaction
from django.test import TestCase, TransactionTestCase, override_settings
from django.test import TestCase, TransactionTestCase, override_settings, tag
from django.utils import timezone

from simple_history.exceptions import AlternativeManagerError, NotHistoricalModelError
Expand All @@ -26,6 +26,7 @@
PollWithSelfManyToMany,
PollWithSeveralManyToMany,
PollWithUniqueQuestion,
PollWithUniqueQuestionAndWithPlace,
Street,
)
from simple_history.utils import (
Expand Down Expand Up @@ -257,6 +258,60 @@ def test_bulk_create_history_with_duplicates_ignore_conflicts(self):
self.assertEqual(PollWithUniqueQuestion.objects.count(), 2)
self.assertEqual(PollWithUniqueQuestion.history.count(), 2)

def test_bulk_create_history_with_duplicates_update_conflicts_create_only_field(
self,
):
poll1 = PollWithUniqueQuestionAndWithPlace.objects.create(
question="Question 1", pub_date=timezone.now(), place="earth"
)
PollWithUniqueQuestionAndWithPlace.objects.create(
question="Question 2", pub_date=timezone.now(), place="moon"
)
duplicates = [
# Reuse the same object that already exists
poll1,
# Match the unique field but with different values for other fields
PollWithUniqueQuestionAndWithPlace(
question="Question 2", pub_date=None, place="sun"
),
PollWithUniqueQuestionAndWithPlace(
question="Question 3", pub_date=None, place="saturn"
),
]

bulk_create_with_history(
duplicates,
PollWithUniqueQuestionAndWithPlace,
update_conflicts=True,
unique_fields=["question"],
update_fields=["pub_date"],
)
new1, new2, new3 = list(
PollWithUniqueQuestionAndWithPlace.objects.order_by("question")
)
# Confirm the first object was updated and has two historical records
self.assertEqual(new1.place, "earth")
self.assertIsNotNone(new1.pub_date)
new1_hist1, new1_hist2 = list(new1.history.all().order_by("history_id"))
self.assertEqual(new1_hist1.history_type, "+")
self.assertEqual(new1_hist2.history_type, "~")
self.assertIsNotNone(new1_hist2.pub_date)
self.assertIsNotNone(new1_hist2.place, "earth")

# This shouldn't change since it wasn't in update_fields
new2_hist1, new2_hist2 = list(new2.history.all().order_by("history_id"))
self.assertEqual(new2_hist1.history_type, "+")
self.assertEqual(new2_hist2.history_type, "~")
self.assertIsNone(new2_hist2.pub_date)
self.assertIsNotNone(new2_hist2.place, "moon")

# There should only be 1 addition record including saturn since place
# is inserted, but not updated.
new3_hist = new3.history.get()
self.assertEqual(new3_hist.history_type, "+")
self.assertIsNone(new2_hist2.pub_date)
self.assertIsNotNone(new2_hist2.place, "saturn")

def test_bulk_create_history_with_no_ids_return(self):
pub_date = timezone.now()
objects = [
Expand Down
75 changes: 71 additions & 4 deletions simple_history/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
from itertools import chain

from django.db import transaction
from django.db.models import Case, ForeignKey, ManyToManyField, Q, When
from django.db.models import (
Case,
Exists,
ForeignKey,
ManyToManyField,
OuterRef,
Q,
Value,
When,
)
from django.forms.models import model_to_dict

from simple_history.exceptions import AlternativeManagerError, NotHistoricalModelError
Expand Down Expand Up @@ -86,6 +97,9 @@ def bulk_create_with_history(
model,
batch_size=None,
ignore_conflicts=False,
update_conflicts=False,
update_fields=None,
unique_fields=None,
default_user=None,
default_change_reason=None,
default_date=None,
Expand All @@ -94,12 +108,24 @@ def bulk_create_with_history(
"""
Bulk create the objects specified by objs while also bulk creating
their history (all in one transaction).
Because of not providing primary key attribute after bulk_create on any DB except
Postgres (https://docs.djangoproject.com/en/2.2/ref/models/querysets/#bulk-create)
Divide this process on two transactions for other DB's
this process on will use two transactions for other DB's.
If ``update_conflicts`` is used, the function will refetch records from the
database to ensure that the historical records match the exact state of the
model instances.
:param objs: List of objs (not yet saved to the db) of type model
:param model: Model class that should be created
:param batch_size: Number of objects that should be created in each batch
:param ignore_conflicts: Boolean passed through to Django ORM to ignore conflicts
:param update_conflicts: Boolean passed through to Django ORM to update on conflicts
:param update_fields: List of fields passed through to Django ORM to update on
conflict.
:param unique_fields: List of fields through to Django ORM to identify uniqueness
to update.
:param default_user: Optional user to specify as the history_user in each historical
record
:param default_change_reason: Optional change reason to specify as the change_reason
Expand All @@ -116,16 +142,57 @@ def bulk_create_with_history(
field.name
for field in model._meta.get_fields()
if isinstance(field, ManyToManyField)
or (
# If using either unique fields or update fields, then we can exclude
# anything not in those lists.
(unique_fields or update_fields)
and field.name not in set(chain(unique_fields or [], update_fields or []))
)
]

history_manager = get_history_manager_for_model(model)
model_manager = model._default_manager

second_transaction_required = True
with transaction.atomic(savepoint=False):
objs_with_id = model_manager.bulk_create(
objs, batch_size=batch_size, ignore_conflicts=ignore_conflicts
objs,
batch_size=batch_size,
ignore_conflicts=ignore_conflicts,
update_conflicts=update_conflicts,
update_fields=update_fields,
unique_fields=unique_fields,
)
if objs_with_id and objs_with_id[0].pk and not ignore_conflicts:
if (
objs_with_id
and all(obj.pk for obj in objs_with_id)
and not ignore_conflicts
):
if update_conflicts:
# If we're updating on conflict, it's possible that some rows
# were updated and some were inserted. We need to refetch
# the data to make sure the historical record matches exactly
# since update_fields could be a subset of the fields. We
# also need to annotate the history_type to indicate if the
# record was updated or inserted.
key_name = get_app_model_primary_key_name(model)
objs_with_id = (
model_manager.filter(pk__in=[obj.pk for obj in objs_with_id])
.annotate(
_history_type=Case(
When(
Exists(
history_manager.filter(
**{key_name: OuterRef(key_name)}
)
),
then=Value("~"),
),
default=Value("+"),
)
)
.select_for_update()
)
second_transaction_required = False
history_manager.bulk_history_create(
objs_with_id,
Expand Down

0 comments on commit ed10665

Please sign in to comment.