Skip to content

Commit

Permalink
BookDataModel: add dry_run argument to merge_into
Browse files Browse the repository at this point in the history
  • Loading branch information
BartSchuurmans committed Mar 20, 2024
1 parent 8003acb commit 804e946
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 14 deletions.
23 changes: 17 additions & 6 deletions bookwyrm/management/commands/deduplicate_book_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from bookwyrm import models


def dedupe_model(model):
def dedupe_model(model, dry_run=False):
"""combine duplicate editions and update related models"""
print(f"deduplicating {model.__name__}:")
fields = model._meta.get_fields()
Expand All @@ -27,10 +27,13 @@ def dedupe_model(model):
print("----------")
objs = model.objects.filter(**{field.name: value}).order_by("id")
canonical = objs.first()
print(f"merging into {canonical.remote_id} based on {field.name} {value}:")
action = "would merge" if dry_run else "merging"
print(
f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:"
)
for obj in objs[1:]:
print(f"- {obj.remote_id}")
absorbed_fields = obj.merge_into(canonical)
absorbed_fields = obj.merge_into(canonical, dry_run=dry_run)
print(f" absorbed fields: {absorbed_fields}")


Expand All @@ -39,9 +42,17 @@ class Command(BaseCommand):

help = "merges duplicate book data"

def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)

# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
"""run deduplications"""
dedupe_model(models.Edition)
dedupe_model(models.Work)
dedupe_model(models.Author)
dedupe_model(models.Edition, dry_run=options["dry_run"])
dedupe_model(models.Work, dry_run=options["dry_run"])
dedupe_model(models.Author, dry_run=options["dry_run"])
11 changes: 9 additions & 2 deletions bookwyrm/management/merge_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ def add_arguments(self, parser):
"""add the arguments for this command"""
parser.add_argument("--canonical", type=int, required=True)
parser.add_argument("--other", type=int, required=True)
parser.add_argument(
"--dry_run",
action="store_true",
help="don't actually merge, only print what would happen",
)

# pylint: disable=no-self-use,unused-argument
def handle(self, *args, **options):
Expand All @@ -25,6 +30,8 @@ def handle(self, *args, **options):
print("other book doesn’t exist!")
return

absorbed_fields = other.merge_into(canonical)
print(f"{other.remote_id} has been merged into {canonical.remote_id}")
absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"])

action = "would be" if options["dry_run"] else "has been"
print(f"{other.remote_id} {action} merged into {canonical.remote_id}")
print(f"absorbed fields: {absorbed_fields}")
19 changes: 13 additions & 6 deletions bookwyrm/models/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,16 @@ def broadcast(self, activity, sender, software="bookwyrm", **kwargs):
"""only send book data updates to other bookwyrm instances"""
super().broadcast(activity, sender, software=software, **kwargs)

def merge_into(self, canonical: Self) -> Dict[str, Any]:
def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]:
"""merge this entity into another entity"""
if canonical.id == self.id:
raise ValueError(f"Cannot merge {self} into itself")

absorbed_fields = canonical.absorb_data_from(self)
absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run)

if dry_run:
return absorbed_fields

canonical.save()

self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical)
Expand Down Expand Up @@ -149,7 +153,7 @@ def merge_into(self, canonical: Self) -> Dict[str, Any]:
self.delete()
return absorbed_fields

def absorb_data_from(self, other: Self) -> Dict[str, Any]:
def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]:
"""fill empty fields with values from another entity"""
absorbed_fields = {}
for data_field in self._meta.get_fields():
Expand All @@ -162,19 +166,22 @@ def absorb_data_from(self, other: Self) -> Dict[str, Any]:
if isinstance(data_field, fields.ArrayField):
if new_values := list(set(other_value) - set(canonical_value)):
# append at the end (in no particular order)
setattr(self, data_field.name, canonical_value + new_values)
if not dry_run:
setattr(self, data_field.name, canonical_value + new_values)
absorbed_fields[data_field.name] = new_values
elif isinstance(data_field, fields.PartialDateField):
if (
(not canonical_value)
or (other_value.has_day and not canonical_value.has_day)
or (other_value.has_month and not canonical_value.has_month)
):
setattr(self, data_field.name, other_value)
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
else:
if not canonical_value:
setattr(self, data_field.name, other_value)
if not dry_run:
setattr(self, data_field.name, other_value)
absorbed_fields[data_field.name] = other_value
return absorbed_fields

Expand Down

0 comments on commit 804e946

Please sign in to comment.