diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index 74475a00b6..c2d897ce33 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -6,7 +6,7 @@ from bookwyrm import models -def dedupe_model(model): +def dedupe_model(model, dry_run=False): """combine duplicate editions and update related models""" print(f"deduplicating {model.__name__}:") fields = model._meta.get_fields() @@ -27,10 +27,13 @@ def dedupe_model(model): print("----------") objs = model.objects.filter(**{field.name: value}).order_by("id") canonical = objs.first() - print(f"merging into {canonical.remote_id} based on {field.name} {value}:") + action = "would merge" if dry_run else "merging" + print( + f"{action} into {model.__name__} {canonical.remote_id} based on {field.name} {value}:" + ) for obj in objs[1:]: print(f"- {obj.remote_id}") - absorbed_fields = obj.merge_into(canonical) + absorbed_fields = obj.merge_into(canonical, dry_run=dry_run) print(f" absorbed fields: {absorbed_fields}") @@ -39,9 +42,17 @@ class Command(BaseCommand): help = "merges duplicate book data" + def add_arguments(self, parser): + """add the arguments for this command""" + parser.add_argument( + "--dry_run", + action="store_true", + help="don't actually merge, only print what would happen", + ) + # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): """run deduplications""" - dedupe_model(models.Edition) - dedupe_model(models.Work) - dedupe_model(models.Author) + dedupe_model(models.Edition, dry_run=options["dry_run"]) + dedupe_model(models.Work, dry_run=options["dry_run"]) + dedupe_model(models.Author, dry_run=options["dry_run"]) diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py index 0c464600a7..66e60814ae 100644 --- a/bookwyrm/management/merge_command.py +++ b/bookwyrm/management/merge_command.py @@ -8,6 +8,11 @@ def add_arguments(self, parser): """add the arguments for this command""" parser.add_argument("--canonical", type=int, required=True) parser.add_argument("--other", type=int, required=True) + parser.add_argument( + "--dry_run", + action="store_true", + help="don't actually merge, only print what would happen", + ) # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): @@ -25,6 +30,8 @@ def handle(self, *args, **options): print("other book doesn’t exist!") return - absorbed_fields = other.merge_into(canonical) - print(f"{other.remote_id} has been merged into {canonical.remote_id}") + absorbed_fields = other.merge_into(canonical, dry_run=options["dry_run"]) + + action = "would be" if options["dry_run"] else "has been" + print(f"{other.remote_id} {action} merged into {canonical.remote_id}") print(f"absorbed fields: {absorbed_fields}") diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index 6074261898..5e46a32451 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -110,12 +110,16 @@ def broadcast(self, activity, sender, software="bookwyrm", **kwargs): """only send book data updates to other bookwyrm instances""" super().broadcast(activity, sender, software=software, **kwargs) - def merge_into(self, canonical: Self) -> Dict[str, Any]: + def merge_into(self, canonical: Self, dry_run=False) -> Dict[str, Any]: """merge this entity into another entity""" if canonical.id == self.id: raise ValueError(f"Cannot merge {self} into itself") - absorbed_fields = canonical.absorb_data_from(self) + absorbed_fields = canonical.absorb_data_from(self, dry_run=dry_run) + + if dry_run: + return absorbed_fields + canonical.save() self.merged_model.objects.create(deleted_id=self.id, merged_into=canonical) @@ -149,7 +153,7 @@ def merge_into(self, canonical: Self) -> Dict[str, Any]: self.delete() return absorbed_fields - def absorb_data_from(self, other: Self) -> Dict[str, Any]: + def absorb_data_from(self, other: Self, dry_run=False) -> Dict[str, Any]: """fill empty fields with values from another entity""" absorbed_fields = {} for data_field in self._meta.get_fields(): @@ -162,7 +166,8 @@ def absorb_data_from(self, other: Self) -> Dict[str, Any]: if isinstance(data_field, fields.ArrayField): if new_values := list(set(other_value) - set(canonical_value)): # append at the end (in no particular order) - setattr(self, data_field.name, canonical_value + new_values) + if not dry_run: + setattr(self, data_field.name, canonical_value + new_values) absorbed_fields[data_field.name] = new_values elif isinstance(data_field, fields.PartialDateField): if ( @@ -170,11 +175,13 @@ def absorb_data_from(self, other: Self) -> Dict[str, Any]: or (other_value.has_day and not canonical_value.has_day) or (other_value.has_month and not canonical_value.has_month) ): - setattr(self, data_field.name, other_value) + if not dry_run: + setattr(self, data_field.name, other_value) absorbed_fields[data_field.name] = other_value else: if not canonical_value: - setattr(self, data_field.name, other_value) + if not dry_run: + setattr(self, data_field.name, other_value) absorbed_fields[data_field.name] = other_value return absorbed_fields