From ad0ada2b143e2e0c95fd00d5018f15976795c55a Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 22 Jul 2024 10:21:50 +1000 Subject: [PATCH 01/16] fix key errors in import job for statuses without an author --- bookwyrm/models/bookwyrm_import_job.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index 5229430eb0..187c71c588 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -182,7 +182,7 @@ def upsert_statuses(user, cls, data, book_remote_id): for status in data: if is_alias( - user, status["attributedTo"] + user, status.get("attributedTo", False) ): # don't let l33t hax0rs steal other people's posts # update ids and remove replies status["attributedTo"] = user.remote_id @@ -215,7 +215,9 @@ def upsert_statuses(user, cls, data, book_remote_id): instance.save() # save and broadcast else: - logger.warning("User does not have permission to import statuses") + logger.warning( + "User does not have permission to import statuses, or status is tombstone" + ) def upsert_lists(user, lists, book_id): @@ -436,16 +438,19 @@ def is_alias(user, remote_id): """check that the user is listed as movedTo or also_known_as in the remote user's profile""" + if not remote_id: + return False + remote_user = activitypub.resolve_remote_id( remote_id=remote_id, model=models.User, save=False ) if remote_user: - if remote_user.moved_to: + if hasattr(remote_user, "moved_to"): return user.remote_id == remote_user.moved_to - if remote_user.also_known_as: + if hasattr(remote_user, "also_known_as"): return user in remote_user.also_known_as.all() return False From b893e93dce260cea8e9ffb4f7753813733db2394 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 18 Aug 2024 14:51:40 +1000 Subject: [PATCH 02/16] add subtasks for user import --- bookwyrm/importers/bookwyrm_import.py | 3 + bookwyrm/models/bookwyrm_import_job.py | 312 ++++++++++++++----------- 2 files changed, 181 insertions(+), 134 deletions(-) diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index 206cd62197..b82163c85b 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -21,4 +21,7 @@ def process_import( job = BookwyrmImportJob.objects.create( user=user, archive_file=archive_file, required=required ) + + # TODO need to read the tarfile here and create a childjob for each book + return job diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index 187c71c588..ba7301aabe 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -3,15 +3,16 @@ import json import logging -from django.db.models import FileField, JSONField, CharField +from django.db.models import FileField, JSONField, CharField, TextChoices, PROTECT from django.utils import timezone from django.utils.html import strip_tags +from django.utils.translation import gettext_lazy as _ from django.contrib.postgres.fields import ArrayField as DjangoArrayField from bookwyrm import activitypub from bookwyrm import models from bookwyrm.tasks import app, IMPORTS -from bookwyrm.models.job import ParentJob, ParentTask, SubTask +from bookwyrm.models.job import ParentJob, ChildJob, ParentTask, SubTask from bookwyrm.utils.tar import BookwyrmTarFile logger = logging.getLogger(__name__) @@ -26,12 +27,66 @@ class BookwyrmImportJob(ParentJob): def start_job(self): """Start the job""" - start_import_task.delay(job_id=self.id, no_children=True) + start_import_task.delay(job_id=self.id) + + @property + def item_count(self): + """How many tasks are there?""" + return self.items.count() + + @property + def pending_item_count(self): + """How many tasks are incomplete?""" + status = BookwyrmImportJob.Status + return self.items.filter(fail_reason__isnull=True, status__in=[status.PENDING, status.ACTIVE]) + + @property + def percent_complete(self): + """How far along?""" + item_count = self.item_count + if not item_count: + return 0 + return math.floor((item_count - self.pending_item_count) / item_count * 100) + +class UserImportBook(ChildJob): + """ ChildJob to import each book. + Equivalent to ImportItem when importing a csv file of books """ + + book_data = JSONField(null=False) + + def start_job(self): + """Start the job""" + import_book_task.delay(task_id=self.id) + + +class UserImportStatuses(ChildJob): + """ ChildJob for comments, quotes, and reviews """ + + class StatusType(TextChoices): + """Possible status types.""" + + COMMENT = "comment", _("Comment") + REVIEW = "review", _("Review") + QUOTE = "quote", _("Quotation") + + json = JSONField(null=False) + book = models.fields.ForeignKey( + "Edition", on_delete=PROTECT, activitypub_field="inReplyToBook" + ) + status_type = CharField( + max_length=10, choices=StatusType.choices, default=StatusType.COMMENT, null=True + ) + + def start_job(self): + """Start the job""" + upsert_statuses_task.delay(task_id=self.id) @app.task(queue=IMPORTS, base=ParentTask) def start_import_task(**kwargs): - """trigger the child import tasks for each user data""" + """trigger the child import tasks for each user data + We always import the books even if not assigning + them to shelves, lists etc""" job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) archive_file = job.archive_file @@ -60,9 +115,13 @@ def start_import_task(**kwargs): if "include_blocks" in job.required: upsert_user_blocks(job.user, job.import_data.get("blocks", [])) - process_books(job, tar) + for data in job.import_data.get("books"): + + book_job = UserImportBook.objects.create(parent_job=job, book_data=data) + book_job.parent_job = job + book_job.start_job() - job.set_status("complete") + #job.set_status("complete") # TODO: is this needed? Don't we want it to be "active"? archive_file.close() except Exception as err: # pylint: disable=broad-except @@ -70,46 +129,17 @@ def start_import_task(**kwargs): job.set_status("failed") -def process_books(job, tar): - """ - Process user import data related to books - We always import the books even if not assigning - them to shelves, lists etc - """ - - books = job.import_data.get("books") - - for data in books: - book = get_or_create_edition(data, tar) - - if "include_shelves" in job.required: - upsert_shelves(book, job.user, data) +# book-related updates +###################### - if "include_readthroughs" in job.required: - upsert_readthroughs(data.get("readthroughs"), job.user, book.id) - - if "include_comments" in job.required: - upsert_statuses( - job.user, models.Comment, data.get("comments"), book.remote_id - ) - if "include_quotations" in job.required: - upsert_statuses( - job.user, models.Quotation, data.get("quotations"), book.remote_id - ) - - if "include_reviews" in job.required: - upsert_statuses( - job.user, models.Review, data.get("reviews"), book.remote_id - ) +@app.task(queue=IMPORTS, base=SubTask) +def import_book_task(task_id): + """Take a JSON string of work and edition data, + find or create the edition and work in the database""" - if "include_lists" in job.required: - upsert_lists(job.user, data.get("lists"), book.id) + task = UserImportBook.objects.get(id=task_id) - -def get_or_create_edition(book_data, tar): - """Take a JSON string of work and edition data, - find or create the edition and work in the database and - return an edition instance""" + # TODO: use try/except and mark item failed on except edition = book_data.get("edition") existing = models.Edition.find_existing(edition) @@ -147,10 +177,109 @@ def get_or_create_edition(book_data, tar): # set the cover image from the tar if cover_path: - tar.write_image_to_file(cover_path, book.cover) + tar.write_image_to_file(cover_path, book.cover) # TODO: open tar file here + + required = task.parent_job.required + task_user = task.parent_job.user + + if "include_shelves" in required: + upsert_shelves(book, task_user, book_data) # TODO: could this be book.id? + + if "include_readthroughs" in required: + upsert_readthroughs(book_data.get("readthroughs"), task_user, book.id) + + if "include_lists" in required: + upsert_lists(task_user, book_data.get("lists"), book.id) - return book + # Now import statuses + # These are also subtasks so that we can isolate anything that fails + if "include_comments" in job.required: + + for status in book_data.get("comments"): + UserImportStatuses.objects.create( + parent_job=task.parent_job, + json=status, + book=book, + status_type=UserImportStatuses.StatusType.COMMENT + ) + + if "include_quotations" in job.required: + # job.user, models.Quotation, data.get("quotations"), book.remote_id + + for status in book_data.get("quotations"): + UserImportStatuses.objects.create( + parent_job=task.parent_job, + json=status, + book=book, + status_type=UserImportStatuses.StatusType.QUOTE + ) + + if "include_reviews" in job.required: + # job.user, models.Review, data.get("reviews"), book.remote_id + for status in book_data.get("reviews"): + UserImportStatuses.objects.create( + parent_job=task.parent_job, + json=status, + book=book, + status_type=UserImportStatuses.StatusType.REVIEW + ) + + for item in UserImportStatuses.objects.get(parent_job=task.parent_job): + item.start_job() + + task.complete_job() + + +@app.task(queue=IMPORTS, base=SubTask) +def upsert_statuses_task(task_id): +# def upsert_statuses_task(user, cls, data, book_remote_id): + """Find or create book statuses""" + + task = UserImportStatuses.objects.get(id=task_id) + user = task.parent_job.user + status_class = models.Review if self.StatusType.REVIEW else models.Quotation if self.StatusType.QUOTE else models.Comment + + if is_alias( + user, status.get("attributedTo", False) + ): # don't let l33t hax0rs steal other people's posts + # update ids and remove replies + status["attributedTo"] = user.remote_id + status["to"] = update_followers_address(user, status["to"]) + status["cc"] = update_followers_address(user, status["cc"]) + status[ + "replies" + ] = ( + {} + ) # this parses incorrectly but we can't set it without knowing the new id + status["inReplyToBook"] = task.book.remote_id # TODO: what if there isn't a remote id? + parsed = activitypub.parse(status) + if not status_already_exists( + user, parsed + ): # don't duplicate posts on multiple import + + instance = parsed.to_model(model=status_class, save=True, overwrite=True) + + for val in [ + "progress", + "progress_mode", + "position", + "endposition", + "position_mode", + ]: + if status.get(val): + instance.val = status[val] + + instance.remote_id = instance.get_remote_id() # update the remote_id + instance.save() # save and broadcast + + task.complete_job() + + else: + logger.warning( + "User does not have permission to import statuses, or status is tombstone" + ) + task.stop_job(reason="failed") def upsert_readthroughs(data, user, book_id): """Take a JSON string of readthroughs and @@ -175,51 +304,6 @@ def upsert_readthroughs(data, user, book_id): if not existing: models.ReadThrough.objects.create(**obj) - -def upsert_statuses(user, cls, data, book_remote_id): - """Take a JSON string of a status and - find or create the instances in the database""" - - for status in data: - if is_alias( - user, status.get("attributedTo", False) - ): # don't let l33t hax0rs steal other people's posts - # update ids and remove replies - status["attributedTo"] = user.remote_id - status["to"] = update_followers_address(user, status["to"]) - status["cc"] = update_followers_address(user, status["cc"]) - status[ - "replies" - ] = ( - {} - ) # this parses incorrectly but we can't set it without knowing the new id - status["inReplyToBook"] = book_remote_id - parsed = activitypub.parse(status) - if not status_already_exists( - user, parsed - ): # don't duplicate posts on multiple import - - instance = parsed.to_model(model=cls, save=True, overwrite=True) - - for val in [ - "progress", - "progress_mode", - "position", - "endposition", - "position_mode", - ]: - if status.get(val): - instance.val = status[val] - - instance.remote_id = instance.get_remote_id() # update the remote_id - instance.save() # save and broadcast - - else: - logger.warning( - "User does not have permission to import statuses, or status is tombstone" - ) - - def upsert_lists(user, lists, book_id): """Take a list of objects each containing a list and list item as AP objects @@ -276,6 +360,8 @@ def upsert_shelves(book, user, book_data): book=book, shelf=book_shelf, user=user, shelved_date=timezone.now() ) +# user updates +############## def update_user_profile(user, tar, data): """update the user's profile from import data""" @@ -317,14 +403,6 @@ def update_user_settings(user, data): user.save(update_fields=update_fields) -@app.task(queue=IMPORTS, base=SubTask) -def update_user_settings_task(job_id): - """wrapper task for user's settings import""" - parent_job = BookwyrmImportJob.objects.get(id=job_id) - - return update_user_settings(parent_job.user, parent_job.import_data.get("user")) - - def update_goals(user, data): """update the user's goals from import data""" @@ -342,14 +420,6 @@ def update_goals(user, data): models.AnnualGoal.objects.create(**goal) -@app.task(queue=IMPORTS, base=SubTask) -def update_goals_task(job_id): - """wrapper task for user's goals import""" - parent_job = BookwyrmImportJob.objects.get(id=job_id) - - return update_goals(parent_job.user, parent_job.import_data.get("goals")) - - def upsert_saved_lists(user, values): """Take a list of remote ids and add as saved lists""" @@ -359,16 +429,6 @@ def upsert_saved_lists(user, values): user.saved_lists.add(book_list) -@app.task(queue=IMPORTS, base=SubTask) -def upsert_saved_lists_task(job_id): - """wrapper task for user's saved lists import""" - parent_job = BookwyrmImportJob.objects.get(id=job_id) - - return upsert_saved_lists( - parent_job.user, parent_job.import_data.get("saved_lists") - ) - - def upsert_follows(user, values): """Take a list of remote ids and add as follows""" @@ -386,14 +446,6 @@ def upsert_follows(user, values): follow_request.save() -@app.task(queue=IMPORTS, base=SubTask) -def upsert_follows_task(job_id): - """wrapper task for user's follows import""" - parent_job = BookwyrmImportJob.objects.get(id=job_id) - - return upsert_follows(parent_job.user, parent_job.import_data.get("follows")) - - def upsert_user_blocks(user, user_ids): """block users""" @@ -412,16 +464,8 @@ def upsert_user_blocks(user, user_ids): # remove the blocked user from all blocker's owned groups models.GroupMember.remove(user, user_object) - -@app.task(queue=IMPORTS, base=SubTask) -def upsert_user_blocks_task(job_id): - """wrapper task for user's blocks import""" - parent_job = BookwyrmImportJob.objects.get(id=job_id) - - return upsert_user_blocks( - parent_job.user, parent_job.import_data.get("blocked_users") - ) - +# utilities +########### def update_followers_address(user, field): """statuses to or cc followers need to have the followers From 28caccaca55104344e7561ca7f9a1162d577a5a2 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 18 Aug 2024 14:53:50 +1000 Subject: [PATCH 03/16] formatting --- bookwyrm/models/bookwyrm_import_job.py | 60 ++++++++++++++++---------- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index ba7301aabe..dfeed65609 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -38,7 +38,9 @@ def item_count(self): def pending_item_count(self): """How many tasks are incomplete?""" status = BookwyrmImportJob.Status - return self.items.filter(fail_reason__isnull=True, status__in=[status.PENDING, status.ACTIVE]) + return self.items.filter( + fail_reason__isnull=True, status__in=[status.PENDING, status.ACTIVE] + ) @property def percent_complete(self): @@ -48,9 +50,10 @@ def percent_complete(self): return 0 return math.floor((item_count - self.pending_item_count) / item_count * 100) + class UserImportBook(ChildJob): - """ ChildJob to import each book. - Equivalent to ImportItem when importing a csv file of books """ + """ChildJob to import each book. + Equivalent to ImportItem when importing a csv file of books""" book_data = JSONField(null=False) @@ -60,7 +63,7 @@ def start_job(self): class UserImportStatuses(ChildJob): - """ ChildJob for comments, quotes, and reviews """ + """ChildJob for comments, quotes, and reviews""" class StatusType(TextChoices): """Possible status types.""" @@ -85,8 +88,8 @@ def start_job(self): @app.task(queue=IMPORTS, base=ParentTask) def start_import_task(**kwargs): """trigger the child import tasks for each user data - We always import the books even if not assigning - them to shelves, lists etc""" + We always import the books even if not assigning + them to shelves, lists etc""" job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) archive_file = job.archive_file @@ -121,7 +124,7 @@ def start_import_task(**kwargs): book_job.parent_job = job book_job.start_job() - #job.set_status("complete") # TODO: is this needed? Don't we want it to be "active"? + # job.set_status("complete") # TODO: is this needed? Don't we want it to be "active"? archive_file.close() except Exception as err: # pylint: disable=broad-except @@ -132,6 +135,7 @@ def start_import_task(**kwargs): # book-related updates ###################### + @app.task(queue=IMPORTS, base=SubTask) def import_book_task(task_id): """Take a JSON string of work and edition data, @@ -177,13 +181,13 @@ def import_book_task(task_id): # set the cover image from the tar if cover_path: - tar.write_image_to_file(cover_path, book.cover) # TODO: open tar file here + tar.write_image_to_file(cover_path, book.cover) # TODO: open tar file here required = task.parent_job.required task_user = task.parent_job.user if "include_shelves" in required: - upsert_shelves(book, task_user, book_data) # TODO: could this be book.id? + upsert_shelves(book, task_user, book_data) # TODO: could this be book.id? if "include_readthroughs" in required: upsert_readthroughs(book_data.get("readthroughs"), task_user, book.id) @@ -201,19 +205,19 @@ def import_book_task(task_id): parent_job=task.parent_job, json=status, book=book, - status_type=UserImportStatuses.StatusType.COMMENT - ) + status_type=UserImportStatuses.StatusType.COMMENT, + ) if "include_quotations" in job.required: - # job.user, models.Quotation, data.get("quotations"), book.remote_id + # job.user, models.Quotation, data.get("quotations"), book.remote_id for status in book_data.get("quotations"): UserImportStatuses.objects.create( parent_job=task.parent_job, json=status, book=book, - status_type=UserImportStatuses.StatusType.QUOTE - ) + status_type=UserImportStatuses.StatusType.QUOTE, + ) if "include_reviews" in job.required: # job.user, models.Review, data.get("reviews"), book.remote_id @@ -222,8 +226,8 @@ def import_book_task(task_id): parent_job=task.parent_job, json=status, book=book, - status_type=UserImportStatuses.StatusType.REVIEW - ) + status_type=UserImportStatuses.StatusType.REVIEW, + ) for item in UserImportStatuses.objects.get(parent_job=task.parent_job): item.start_job() @@ -233,12 +237,18 @@ def import_book_task(task_id): @app.task(queue=IMPORTS, base=SubTask) def upsert_statuses_task(task_id): -# def upsert_statuses_task(user, cls, data, book_remote_id): + # def upsert_statuses_task(user, cls, data, book_remote_id): """Find or create book statuses""" task = UserImportStatuses.objects.get(id=task_id) user = task.parent_job.user - status_class = models.Review if self.StatusType.REVIEW else models.Quotation if self.StatusType.QUOTE else models.Comment + status_class = ( + models.Review + if self.StatusType.REVIEW + else models.Quotation + if self.StatusType.QUOTE + else models.Comment + ) if is_alias( user, status.get("attributedTo", False) @@ -249,10 +259,10 @@ def upsert_statuses_task(task_id): status["cc"] = update_followers_address(user, status["cc"]) status[ "replies" - ] = ( - {} - ) # this parses incorrectly but we can't set it without knowing the new id - status["inReplyToBook"] = task.book.remote_id # TODO: what if there isn't a remote id? + ] = {} # this parses incorrectly but we can't set it without knowing the new id + status[ + "inReplyToBook" + ] = task.book.remote_id # TODO: what if there isn't a remote id? parsed = activitypub.parse(status) if not status_already_exists( user, parsed @@ -281,6 +291,7 @@ def upsert_statuses_task(task_id): ) task.stop_job(reason="failed") + def upsert_readthroughs(data, user, book_id): """Take a JSON string of readthroughs and find or create the instances in the database""" @@ -304,6 +315,7 @@ def upsert_readthroughs(data, user, book_id): if not existing: models.ReadThrough.objects.create(**obj) + def upsert_lists(user, lists, book_id): """Take a list of objects each containing a list and list item as AP objects @@ -360,9 +372,11 @@ def upsert_shelves(book, user, book_data): book=book, shelf=book_shelf, user=user, shelved_date=timezone.now() ) + # user updates ############## + def update_user_profile(user, tar, data): """update the user's profile from import data""" name = data.get("name", None) @@ -464,9 +478,11 @@ def upsert_user_blocks(user, user_ids): # remove the blocked user from all blocker's owned groups models.GroupMember.remove(user, user_object) + # utilities ########### + def update_followers_address(user, field): """statuses to or cc followers need to have the followers address updated to the new local user""" From 9f1c023b9e9e294be8a871a20a7b6845284e90a2 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 26 Aug 2024 20:17:52 +1000 Subject: [PATCH 04/16] refactor user imports - Refactors BookwyrmImportJob to be staged into three sets of subtasks. - Adds more robust error checking - Fixes bug in check for moved_to value - Fixes but where status Tombstones caused imports to fail - Improves user import job interface to provide more information matching csv imports --- .../0208_userrelationshipimport_and_more.py | 132 +++++ bookwyrm/models/__init__.py | 2 +- bookwyrm/models/bookwyrm_import_job.py | 469 ++++++++++++------ bookwyrm/models/job.py | 11 +- bookwyrm/templates/import/import_user.html | 4 +- .../templates/import/user_import_status.html | 206 ++++++++ bookwyrm/urls.py | 10 + bookwyrm/views/__init__.py | 8 +- bookwyrm/views/imports/import_status.py | 79 +++ 9 files changed, 750 insertions(+), 171 deletions(-) create mode 100644 bookwyrm/migrations/0208_userrelationshipimport_and_more.py create mode 100644 bookwyrm/templates/import/user_import_status.html diff --git a/bookwyrm/migrations/0208_userrelationshipimport_and_more.py b/bookwyrm/migrations/0208_userrelationshipimport_and_more.py new file mode 100644 index 0000000000..03a8aa42bd --- /dev/null +++ b/bookwyrm/migrations/0208_userrelationshipimport_and_more.py @@ -0,0 +1,132 @@ +# Generated by Django 4.2.15 on 2024-08-26 10:16 + +import bookwyrm.models.fields +import django.contrib.postgres.fields +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0207_sqlparse_update"), + ] + + operations = [ + migrations.CreateModel( + name="UserRelationshipImport", + fields=[ + ( + "childjob_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="bookwyrm.childjob", + ), + ), + ( + "relationship", + bookwyrm.models.fields.CharField( + choices=[("follow", "Follow"), ("block", "Block")], + max_length=10, + null=True, + ), + ), + ( + "remote_id", + bookwyrm.models.fields.RemoteIdField( + max_length=255, + null=True, + validators=[bookwyrm.models.fields.validate_remote_id], + ), + ), + ], + options={ + "abstract": False, + }, + bases=("bookwyrm.childjob",), + ), + migrations.AlterField( + model_name="bookwyrmimportjob", + name="required", + field=django.contrib.postgres.fields.ArrayField( + base_field=bookwyrm.models.fields.CharField(blank=True, max_length=50), + blank=True, + size=None, + ), + ), + migrations.CreateModel( + name="UserImportPost", + fields=[ + ( + "childjob_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="bookwyrm.childjob", + ), + ), + ("json", models.JSONField()), + ( + "status_type", + bookwyrm.models.fields.CharField( + choices=[ + ("comment", "Comment"), + ("review", "Review"), + ("quote", "Quotation"), + ], + default="comment", + max_length=10, + null=True, + ), + ), + ( + "book", + bookwyrm.models.fields.ForeignKey( + on_delete=django.db.models.deletion.PROTECT, + to="bookwyrm.edition", + ), + ), + ], + options={ + "abstract": False, + }, + bases=("bookwyrm.childjob",), + ), + migrations.CreateModel( + name="UserImportBook", + fields=[ + ( + "childjob_ptr", + models.OneToOneField( + auto_created=True, + on_delete=django.db.models.deletion.CASCADE, + parent_link=True, + primary_key=True, + serialize=False, + to="bookwyrm.childjob", + ), + ), + ("book_data", models.JSONField()), + ( + "book", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + to="bookwyrm.book", + ), + ), + ], + options={ + "abstract": False, + }, + bases=("bookwyrm.childjob",), + ), + ] diff --git a/bookwyrm/models/__init__.py b/bookwyrm/models/__init__.py index 6bb99c7f25..5d197661f6 100644 --- a/bookwyrm/models/__init__.py +++ b/bookwyrm/models/__init__.py @@ -26,7 +26,7 @@ from .group import Group, GroupMember, GroupMemberInvitation from .import_job import ImportJob, ImportItem -from .bookwyrm_import_job import BookwyrmImportJob +from .bookwyrm_import_job import BookwyrmImportJob, UserImportBook, UserImportPost from .bookwyrm_export_job import BookwyrmExportJob from .move import MoveUser diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index dfeed65609..61c2548939 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -2,8 +2,18 @@ import json import logging - -from django.db.models import FileField, JSONField, CharField, TextChoices, PROTECT +import math + +from django.db.models import ( + ForeignKey, + FileField, + JSONField, + TextChoices, + CASCADE, + PROTECT, + SET_NULL, +) +from django.db.utils import IntegrityError from django.utils import timezone from django.utils.html import strip_tags from django.utils.translation import gettext_lazy as _ @@ -23,24 +33,51 @@ class BookwyrmImportJob(ParentJob): archive_file = FileField(null=True, blank=True) import_data = JSONField(null=True) - required = DjangoArrayField(CharField(max_length=50, blank=True), blank=True) + required = DjangoArrayField( + models.fields.CharField(max_length=50, blank=True), blank=True + ) def start_job(self): """Start the job""" start_import_task.delay(job_id=self.id) + @property + def book_tasks(self): + """How many import book tasks are there?""" + return UserImportBook.objects.filter(parent_job=self).all() + + @property + def status_tasks(self): + """How many import status tasks are there?""" + return UserImportPost.objects.filter(parent_job=self).all() + + @property + def relationship_tasks(self): + """How many import relationship tasks are there?""" + return UserRelationshipImport.objects.filter(parent_job=self).all() + @property def item_count(self): - """How many tasks are there?""" - return self.items.count() + """How many total tasks are there?""" + return self.book_tasks.count() + self.status_tasks.count() @property def pending_item_count(self): """How many tasks are incomplete?""" status = BookwyrmImportJob.Status - return self.items.filter( - fail_reason__isnull=True, status__in=[status.PENDING, status.ACTIVE] - ) + book_tasks = self.book_tasks.filter( + status__in=[status.PENDING, status.ACTIVE] + ).count() + + status_tasks = self.status_tasks.filter( + status__in=[status.PENDING, status.ACTIVE] + ).count() + + relationship_tasks = self.relationship_tasks.filter( + status__in=[status.PENDING, status.ACTIVE] + ).count() + + return book_tasks + status_tasks + relationship_tasks @property def percent_complete(self): @@ -55,14 +92,15 @@ class UserImportBook(ChildJob): """ChildJob to import each book. Equivalent to ImportItem when importing a csv file of books""" + book = ForeignKey(models.Book, on_delete=SET_NULL, null=True, blank=True) book_data = JSONField(null=False) def start_job(self): """Start the job""" - import_book_task.delay(task_id=self.id) + import_book_task.delay(child_id=self.id) -class UserImportStatuses(ChildJob): +class UserImportPost(ChildJob): """ChildJob for comments, quotes, and reviews""" class StatusType(TextChoices): @@ -76,13 +114,32 @@ class StatusType(TextChoices): book = models.fields.ForeignKey( "Edition", on_delete=PROTECT, activitypub_field="inReplyToBook" ) - status_type = CharField( + status_type = models.fields.CharField( max_length=10, choices=StatusType.choices, default=StatusType.COMMENT, null=True ) def start_job(self): """Start the job""" - upsert_statuses_task.delay(task_id=self.id) + upsert_statuses_task.delay(child_id=self.id) + + +class UserRelationshipImport(ChildJob): + """ChildJob for follows and blocks""" + + class RelationshipType(TextChoices): + """Possible relationship types.""" + + FOLLOW = "follow", _("Follow") + BLOCK = "block", _("Block") + + relationship = models.fields.CharField( + max_length=10, choices=RelationshipType.choices, null=True + ) + remote_id = models.fields.RemoteIdField(null=True, unique=False) + + def start_job(self): + """Start the job""" + import_user_relationship_task.delay(child_id=self.id) @app.task(queue=IMPORTS, base=ParentTask) @@ -91,12 +148,15 @@ def start_import_task(**kwargs): We always import the books even if not assigning them to shelves, lists etc""" job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) - archive_file = job.archive_file + archive_file = job.bookwyrmimportjob.archive_file # don't start the job if it was stopped from the UI if job.complete: return + job.status = "active" + job.save(update_fields=["status"]) + try: archive_file.open("rb") with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: @@ -113,186 +173,220 @@ def start_import_task(**kwargs): update_goals(job.user, job.import_data.get("goals", [])) if "include_saved_lists" in job.required: upsert_saved_lists(job.user, job.import_data.get("saved_lists", [])) + if "include_follows" in job.required: - upsert_follows(job.user, job.import_data.get("follows", [])) + for remote_id in job.import_data.get("follows", []): + UserRelationshipImport.objects.create( + parent_job=job, remote_id=remote_id, relationship="follow" + ) + if "include_blocks" in job.required: - upsert_user_blocks(job.user, job.import_data.get("blocks", [])) + for remote_id in job.import_data.get("blocks", []): + UserRelationshipImport.objects.create( + parent_job=job, remote_id=remote_id, relationship="block" + ) - for data in job.import_data.get("books"): + for item in UserRelationshipImport.objects.filter(parent_job=job).all(): + item.start_job() + for data in job.import_data.get("books"): book_job = UserImportBook.objects.create(parent_job=job, book_data=data) - book_job.parent_job = job book_job.start_job() - # job.set_status("complete") # TODO: is this needed? Don't we want it to be "active"? archive_file.close() + # job.complete_job() except Exception as err: # pylint: disable=broad-except logger.exception("User Import Job %s Failed with error: %s", job.id, err) job.set_status("failed") -# book-related updates -###################### - - @app.task(queue=IMPORTS, base=SubTask) -def import_book_task(task_id): - """Take a JSON string of work and edition data, +def import_book_task(**kwargs): + """Take work and edition data, find or create the edition and work in the database""" - task = UserImportBook.objects.get(id=task_id) + task = UserImportBook.objects.get(id=kwargs["child_id"]) + job = task.parent_job + archive_file = job.bookwyrmimportjob.archive_file + book_data = task.book_data - # TODO: use try/except and mark item failed on except + # don't start the job if it was stopped from the UI + if job.complete or task.complete: + return - edition = book_data.get("edition") - existing = models.Edition.find_existing(edition) - if existing: - return existing + try: + edition = book_data.get("edition") + book = models.Edition.find_existing(edition) + if not book: + # make sure we have the authors in the local DB + # replace the old author ids in the edition JSON + edition["authors"] = [] + for author in book_data.get("authors"): + parsed_author = activitypub.parse(author) + instance = parsed_author.to_model( + model=models.Author, + save=True, + overwrite=True, # TODO: why do we use overwrite? + ) - # make sure we have the authors in the local DB - # replace the old author ids in the edition JSON - edition["authors"] = [] - for author in book_data.get("authors"): - parsed_author = activitypub.parse(author) - instance = parsed_author.to_model( - model=models.Author, save=True, overwrite=True - ) + edition["authors"].append(instance.remote_id) - edition["authors"].append(instance.remote_id) + # we will add the cover later from the tar + # don't try to load it from the old server + cover = edition.get("cover", {}) + cover_path = cover.get("url", None) + edition["cover"] = {} - # we will add the cover later from the tar - # don't try to load it from the old server - cover = edition.get("cover", {}) - cover_path = cover.get("url", None) - edition["cover"] = {} + # first we need the parent work to exist + work = book_data.get("work") + work["editions"] = [] + parsed_work = activitypub.parse(work) + work_instance = parsed_work.to_model( + model=models.Work, save=True, overwrite=True + ) - # first we need the parent work to exist - work = book_data.get("work") - work["editions"] = [] - parsed_work = activitypub.parse(work) - work_instance = parsed_work.to_model(model=models.Work, save=True, overwrite=True) + # now we have a work we can add it to the edition + # and create the edition model instance + edition["work"] = work_instance.remote_id + parsed_edition = activitypub.parse(edition) + book = parsed_edition.to_model( + model=models.Edition, save=True, overwrite=True + ) - # now we have a work we can add it to the edition - # and create the edition model instance - edition["work"] = work_instance.remote_id - parsed_edition = activitypub.parse(edition) - book = parsed_edition.to_model(model=models.Edition, save=True, overwrite=True) + # set the cover image from the tar + # NOTE we don't have the images to go with test json! + # TODO: test this later + if cover_path: + archive_file.open("rb") + with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: + tar.write_image_to_file(cover_path, book.cover) + archive_file.close() - # set the cover image from the tar - if cover_path: - tar.write_image_to_file(cover_path, book.cover) # TODO: open tar file here + task.book = book + task.save(update_fields=["book"]) + required = task.parent_job.bookwyrmimportjob.required + task_user = task.parent_job.user - required = task.parent_job.required - task_user = task.parent_job.user + if "include_shelves" in required: + upsert_shelves(task_user, book, book_data.get("shelves")) - if "include_shelves" in required: - upsert_shelves(book, task_user, book_data) # TODO: could this be book.id? + if "include_readthroughs" in required: + upsert_readthroughs(task_user, book.id, book_data.get("readthroughs")) - if "include_readthroughs" in required: - upsert_readthroughs(book_data.get("readthroughs"), task_user, book.id) + if "include_lists" in required: + upsert_lists(task_user, book.id, book_data.get("lists")) - if "include_lists" in required: - upsert_lists(task_user, book_data.get("lists"), book.id) + except Exception as err: + logger.exception( + "Book Import Task %s for Job %s Failed with error: %s", task.id, job.id, err + ) + job.set_status("failed") # Now import statuses # These are also subtasks so that we can isolate anything that fails - - if "include_comments" in job.required: - + if "include_comments" in job.bookwyrmimportjob.required: for status in book_data.get("comments"): - UserImportStatuses.objects.create( + UserImportPost.objects.create( parent_job=task.parent_job, json=status, book=book, - status_type=UserImportStatuses.StatusType.COMMENT, + status_type=UserImportPost.StatusType.COMMENT, ) - if "include_quotations" in job.required: - # job.user, models.Quotation, data.get("quotations"), book.remote_id - + if "include_quotations" in job.bookwyrmimportjob.required: for status in book_data.get("quotations"): - UserImportStatuses.objects.create( + UserImportPost.objects.create( parent_job=task.parent_job, json=status, book=book, - status_type=UserImportStatuses.StatusType.QUOTE, + status_type=UserImportPost.StatusType.QUOTE, ) - if "include_reviews" in job.required: - # job.user, models.Review, data.get("reviews"), book.remote_id + if "include_reviews" in job.bookwyrmimportjob.required: for status in book_data.get("reviews"): - UserImportStatuses.objects.create( + UserImportPost.objects.create( parent_job=task.parent_job, json=status, book=book, - status_type=UserImportStatuses.StatusType.REVIEW, + status_type=UserImportPost.StatusType.REVIEW, ) - for item in UserImportStatuses.objects.get(parent_job=task.parent_job): + for item in UserImportPost.objects.filter(parent_job=job).all(): item.start_job() task.complete_job() @app.task(queue=IMPORTS, base=SubTask) -def upsert_statuses_task(task_id): - # def upsert_statuses_task(user, cls, data, book_remote_id): +def upsert_statuses_task(**kwargs): """Find or create book statuses""" - task = UserImportStatuses.objects.get(id=task_id) - user = task.parent_job.user + task = UserImportPost.objects.get(id=kwargs["child_id"]) + job = task.parent_job + user = job.user + status = task.json status_class = ( models.Review - if self.StatusType.REVIEW + if task.StatusType.REVIEW else models.Quotation - if self.StatusType.QUOTE + if task.StatusType.QUOTE else models.Comment ) - if is_alias( - user, status.get("attributedTo", False) - ): # don't let l33t hax0rs steal other people's posts - # update ids and remove replies - status["attributedTo"] = user.remote_id - status["to"] = update_followers_address(user, status["to"]) - status["cc"] = update_followers_address(user, status["cc"]) - status[ - "replies" - ] = {} # this parses incorrectly but we can't set it without knowing the new id - status[ - "inReplyToBook" - ] = task.book.remote_id # TODO: what if there isn't a remote id? - parsed = activitypub.parse(status) - if not status_already_exists( - user, parsed - ): # don't duplicate posts on multiple import - - instance = parsed.to_model(model=status_class, save=True, overwrite=True) - - for val in [ - "progress", - "progress_mode", - "position", - "endposition", - "position_mode", - ]: - if status.get(val): - instance.val = status[val] - - instance.remote_id = instance.get_remote_id() # update the remote_id - instance.save() # save and broadcast - - task.complete_job() - - else: - logger.warning( - "User does not have permission to import statuses, or status is tombstone" - ) - task.stop_job(reason="failed") + # don't start the job if it was stopped from the UI + if job.complete or task.complete: + return + try: + # only add statuses if this is the same user + logger.info("attributedTo: %s ", status.get("attributedTo", False)) + if is_alias(user, status.get("attributedTo", False)): + status["attributedTo"] = user.remote_id + status["to"] = update_followers_address(user, status["to"]) + status["cc"] = update_followers_address(user, status["cc"]) + status[ + "replies" + ] = ( + {} + ) # this parses incorrectly but we can't set it without knowing the new id + status["inReplyToBook"] = task.book.remote_id + parsed = activitypub.parse(status) + if not status_already_exists( + user, parsed + ): # don't duplicate posts on multiple import + + instance = parsed.to_model( + model=status_class, save=True, overwrite=True + ) + + for val in [ + "progress", + "progress_mode", + "position", + "endposition", + "position_mode", + ]: + if status.get(val): + instance.val = status[val] + + instance.remote_id = instance.get_remote_id() # update the remote_id + instance.save() # save and broadcast + + task.complete_job() + + else: + logger.warning( + "User does not have permission to import statuses, or status is tombstone" + ) + task.set_status("failed") -def upsert_readthroughs(data, user, book_id): + except Exception as err: + logger.exception("User Import Job %s Failed with error: %s", task.id, err) + task.set_status("failed") + + +def upsert_readthroughs(user, book_id, data): """Take a JSON string of readthroughs and find or create the instances in the database""" @@ -315,8 +409,14 @@ def upsert_readthroughs(data, user, book_id): if not existing: models.ReadThrough.objects.create(**obj) + return -def upsert_lists(user, lists, book_id): + +def upsert_lists( + user, + book_id, + lists, +): """Take a list of objects each containing a list and list item as AP objects @@ -351,12 +451,13 @@ def upsert_lists(user, lists, book_id): order=count + 1, ) + return + -def upsert_shelves(book, user, book_data): +def upsert_shelves(user, book, shelves): """Take shelf JSON objects and create DB entries if they don't already exist""" - shelves = book_data["shelves"] for shelf in shelves: book_shelf = models.Shelf.objects.filter(name=shelf["name"], user=user).first() @@ -372,6 +473,8 @@ def upsert_shelves(book, user, book_data): book=book, shelf=book_shelf, user=user, shelved_date=timezone.now() ) + return + # user updates ############## @@ -443,40 +546,83 @@ def upsert_saved_lists(user, values): user.saved_lists.add(book_list) -def upsert_follows(user, values): - """Take a list of remote ids and add as follows""" +@app.task(queue=IMPORTS, base=SubTask) +def import_user_relationship_task(**kwargs): + """import a user follow or block from an import file""" - for remote_id in values: - followee = activitypub.resolve_remote_id(remote_id, models.User) - if followee: - (follow_request, created,) = models.UserFollowRequest.objects.get_or_create( - user_subject=user, - user_object=followee, - ) + task = UserRelationshipImport.objects.get(id=kwargs["child_id"]) + job = task.parent_job - if not created: - # this request probably failed to connect with the remote - # and should save to trigger a re-broadcast - follow_request.save() + try: + if task.relationship == "follow": + + followee = activitypub.resolve_remote_id(task.remote_id, models.User) + if followee: + ( + follow_request, + created, + ) = models.UserFollowRequest.objects.get_or_create( + user_subject=job.user, + user_object=followee, + ) + if not created: + # this request probably failed to connect with the remote + # and should save to trigger a re-broadcast + follow_request.save() -def upsert_user_blocks(user, user_ids): - """block users""" + task.complete_job() - for user_id in user_ids: - user_object = activitypub.resolve_remote_id(user_id, models.User) - if user_object: - exists = models.UserBlocks.objects.filter( - user_subject=user, user_object=user_object - ).exists() - if not exists: - models.UserBlocks.objects.create( - user_subject=user, user_object=user_object + else: + logger.exception( + "Could not resolve user %s task %s", task.remote_id, task.id ) - # remove the blocked users's lists from the groups - models.List.remove_from_group(user, user_object) - # remove the blocked user from all blocker's owned groups - models.GroupMember.remove(user, user_object) + task.set_status("failed") + + elif tasks.relationship == "block": + + user_object = activitypub.resolve_remote_id(task.remote_id, models.User) + if user_object: + exists = models.UserBlocks.objects.filter( + user_subject=job.user, user_object=user_object + ).exists() + if not exists: + models.UserBlocks.objects.create( + user_subject=job.user, user_object=user_object + ) + # remove the blocked users's lists from the groups + models.List.remove_from_group(job.user, user_object) + # remove the blocked user from all blocker's owned groups + models.GroupMember.remove(job.user, user_object) + + task.complete_job() + + else: + logger.exception( + "Could not resolve user %s task %s", task.remote_id, task.id + ) + task.set_status("failed") + + else: + logger.exception( + "Invalid relationship %s type specified in task %s", + task.relationship, + task.id, + ) + task.set_status("failed") + + except IntegrityError as err: + # `null value in column "to_user_id" of relation "bookwyrm_user_also_known_as" violates not-null constraint` + # TODO: this seems to indicate that the *alias* doesn't have an ID, which will always be the case + # if we don't have them in our DB already? + # seems like a bug in activitypub.resolve_remote_id? We need to import BOTH users + logger.exception( + "User Import Job %s experienced an IntegrityError: %s", task.id, err + ) + task.set_status("failed") + except Exception as err: + logger.exception("User Import Job %s Failed with error: %s", task.id, err) + task.set_status("failed") # utilities @@ -495,8 +641,8 @@ def update_followers_address(user, field): def is_alias(user, remote_id): - """check that the user is listed as movedTo or also_known_as - in the remote user's profile""" + """check that the user is listed as moved_to + or also_known_as in the remote user's profile""" if not remote_id: return False @@ -506,8 +652,7 @@ def is_alias(user, remote_id): ) if remote_user: - - if hasattr(remote_user, "moved_to"): + if getattr(remote_user, "moved_to", None) is not None: return user.remote_id == remote_user.moved_to if hasattr(remote_user, "also_known_as"): diff --git a/bookwyrm/models/job.py b/bookwyrm/models/job.py index 5a26535718..4d25c14404 100644 --- a/bookwyrm/models/job.py +++ b/bookwyrm/models/job.py @@ -133,7 +133,8 @@ def __terminate_pending_child_jobs(self): tasks = self.pending_child_jobs.filter(task_id__isnull=False).values_list( "task_id", flat=True ) - app.control.revoke(list(tasks)) + tasklist = [str(task) for task in list(tasks)] + app.control.revoke(tasklist) self.pending_child_jobs.update(status=self.Status.STOPPED) @@ -208,7 +209,7 @@ def before_start( job.task_id = task_id job.save(update_fields=["task_id"]) - if kwargs["no_children"]: + if kwargs.get("no_children"): job.set_status(ChildJob.Status.ACTIVE) def on_success( @@ -233,7 +234,7 @@ def on_success( None: The return value of this handler is ignored. """ - if kwargs["no_children"]: + if kwargs.get("no_children"): job = ParentJob.objects.get(id=kwargs["job_id"]) job.complete_job() @@ -247,7 +248,7 @@ class SubTask(app.Task): """ def before_start( - self, task_id, *args, **kwargs + self, task_id, args, kwargs ): # pylint: disable=no-self-use, unused-argument """Handler called before the task starts. Override. @@ -271,7 +272,7 @@ def before_start( child_job.set_status(ChildJob.Status.ACTIVE) def on_success( - self, retval, task_id, *args, **kwargs + self, retval, task_id, args, kwargs ): # pylint: disable=no-self-use, unused-argument """Run by the worker if the task executes successfully. Override. diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html index 70b21673cc..9009724166 100644 --- a/bookwyrm/templates/import/import_user.html +++ b/bookwyrm/templates/import/import_user.html @@ -186,10 +186,10 @@

{% trans "Recent Imports" %}

{% endif %} {% for job in jobs %} + {{ job.created_date }} -

{{ job.created_date }}

+

{{ job.updated_date }}

- {{ job.updated_date }} +

+ {% block page_title %} + {% if job.retry %} + {% trans "Retry Status" %} + {% else %} + {% trans "User Import Status" %} + {% endif %} + {% endblock %} +

+ + + +
+
+
{% trans "Import started:" %}
+
{{ job.created_date | naturaltime }}
+
Import Job Status:
+
+ + {% if job.status %} + {{ job.status }} + {{ job.status_display }} + {% elif job.complete %} + {% trans "Complete" %} + {% else %} + {% trans "Active" %} + {% endif %} + +
+
{% trans "Books" %}:
+
+ {% blocktrans %}total {{ book_jobs_count }}, failed {{ failed_books_count }}{% endblocktrans %} +
+
{% trans "Statuses" %}:
+
+ {% blocktrans %}total {{ status_jobs_count }}, failed {{ failed_statuses_count }}{% endblocktrans %} +
+
{% trans "Follows & Blocks" %}:
+
+ {% blocktrans %}total {{ relationship_jobs_count }}, failed {{ failed_relationships_count }}{% endblocktrans %} +
+
+
+ + {% if job.status == "active" and show_progress %} +
+
+ + {% trans "In progress" %} + + {% trans "Refresh" %} + +
+
+ + {{ percent }} % + + {{ percent }}% +
+
+ {% endif %} + + {% if not job.complete %} +
+ {% csrf_token %} + +
+ {% endif %} + + {% if job.complete and fail_count and not job.retry and not legacy %} +
+ {% blocktrans trimmed count counter=fail_count with display_counter=fail_count|intcomma %} + {{ display_counter }} item failed to import. + {% plural %} + {{ display_counter }} items failed to import. + {% endblocktrans %} + + {% trans "View and troubleshoot failed items" %} + +
+ {% endif %} + + +
+ {% block actions %}{% endblock %} +
+ + + + + + {% block import_cols_headers %} + + + {% endblock %} + + {% if not items %} + + + + {% else %} + {% for item in items %} + + + + + {% block import_cols %} + + + {% endblock %} + + {% block action_row %}{% endblock %} + {% endfor %} + {% endif %} +
+ {% trans "Title" %} + + {% trans "ISBN" %} + + {% trans "Authors" %} + + {% trans "Book" %} + + {% trans "Status" %} +
+ {% trans "No items currently need review" %} +
+ {{ item.book_data.edition.title }} + + {{ item.book_data.edition.isbn13|default:'' }} + + + {% for author in item.book_data.authors %} +

{{ author.name }}

+ {% endfor %} +
+ {% if item.book %} + + {% include 'snippets/book_cover.html' with book=item.book cover_class='is-h-s' size='small' %} + + {% endif %} + + {% if item.book %} + + {% trans "Imported" %} + {% else %} +
+ {{ item.status }} + {# retry option if an item appears to be hanging #} + {% if job.created_date != job.updated_date and inactive_time > 24 %} +
+ {% csrf_token %} + +
+ {% endif %} +
+ {% endif %} +
+
+
+ +
+ {% include 'snippets/pagination.html' with page=items path=page_path %} +
+{% endspaceless %}{% endblock %} diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index cd75eb0c02..8010bd0932 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -434,6 +434,11 @@ # imports re_path(r"^import/?$", views.Import.as_view(), name="import"), re_path(r"^user-import/?$", views.UserImport.as_view(), name="user-import"), + re_path( + r"^user-import/(?P\d+)/?$", + views.UserImportStatus.as_view(), + name="user-import-status", + ), re_path( r"^import/(?P\d+)/?$", views.ImportStatus.as_view(), @@ -444,6 +449,11 @@ views.stop_import, name="import-stop", ), + re_path( + r"^user-import/(?P\d+)/stop/?$", + views.user_stop_import, + name="user-import-stop", + ), re_path( r"^import/(?P\d+)/retry/(?P\d+)/?$", views.retry_item, diff --git a/bookwyrm/views/__init__.py b/bookwyrm/views/__init__.py index ebc851847a..c7730f7464 100644 --- a/bookwyrm/views/__init__.py +++ b/bookwyrm/views/__init__.py @@ -87,7 +87,13 @@ # csv import from .imports.import_data import Import, UserImport -from .imports.import_status import ImportStatus, retry_item, stop_import +from .imports.import_status import ( + ImportStatus, + UserImportStatus, + retry_item, + stop_import, + user_stop_import, +) from .imports.troubleshoot import ImportTroubleshoot from .imports.manually_review import ( ImportManualReview, diff --git a/bookwyrm/views/imports/import_status.py b/bookwyrm/views/imports/import_status.py index 87adbb4549..cdc387ab28 100644 --- a/bookwyrm/views/imports/import_status.py +++ b/bookwyrm/views/imports/import_status.py @@ -83,3 +83,82 @@ def stop_import(request, job_id): job = get_object_or_404(models.ImportJob, id=job_id, user=request.user) job.stop_job() return redirect("import-status", job_id) + + +# pylint: disable= no-self-use +@method_decorator(login_required, name="dispatch") +class UserImportStatus(View): + """status of an existing import""" + + def get(self, request, job_id): + """status of an import job""" + job = get_object_or_404(models.BookwyrmImportJob, id=job_id) + if job.user != request.user: + raise PermissionDenied() + + jobs = job.book_tasks.all().order_by("created_date") + item_count = job.item_count or 1 + + paginated = Paginator(jobs, PAGE_LENGTH) + page = paginated.get_page(request.GET.get("page")) + + book_jobs_count = job.book_tasks.count() or "(pending...)" + failed_books_count = job.book_tasks.filter(status="failed").count() or 0 + if job.complete and not job.book_tasks.count(): + book_jobs_count = 0 + + status_jobs_count = job.status_tasks.count() or "(ending...)" + if job.complete and not job.status_tasks.count(): + status_jobs_count = 0 + failed_statuses_count = job.status_tasks.filter(status="failed").count() or 0 + + relationship_jobs_count = job.relationship_tasks.count() or "(pending...)" + if job.complete and not job.relationship_tasks.count(): + relationship_jobs_count = 0 + failed_relationships_count = ( + job.relationship_tasks.filter(status="failed").count() or 0 + ) + + pending_item_count = job.pending_item_count + + data = { + "job": job, + "items": page, + "book_jobs_count": book_jobs_count, + "status_jobs_count": status_jobs_count, + "relationship_jobs_count": relationship_jobs_count, + "failed_books_count": failed_books_count, + "failed_statuses_count": failed_statuses_count, + "failed_relationships_count": failed_relationships_count, + "page_range": paginated.get_elided_page_range( + page.number, on_each_side=2, on_ends=1 + ), + "show_progress": True, + "item_count": item_count, + "complete_count": item_count - pending_item_count, + "percent": job.percent_complete, + # hours since last import item update + "inactive_time": (job.updated_date - timezone.now()).seconds / 60 / 60, + } + + return TemplateResponse(request, "import/user_import_status.html", data) + + +@login_required +@require_POST +def user_stop_import(request, job_id): + """scrap that""" + job = get_object_or_404(models.BookwyrmImportJob, id=job_id, user=request.user) + job.stop_job() + return redirect("user-import-status", job_id) + + +@login_required +@require_POST +def user_retry_item(request, job_id, item_id): + """retry an item from a user import""" + item = get_object_or_404( + models.UserImportBook, id=item_id, job__id=job_id, job__user=request.user + ) + import_book_task.delay(child_id=item_id, job_id=job_id) + return redirect("user-import-status", job_id) From 215686cecb4ee113f53781c6c536dd7f5d8d1db3 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 2 Sep 2024 08:22:23 +1000 Subject: [PATCH 05/16] add user import retry This also updated a template tag to be more flexible --- bookwyrm/importers/bookwyrm_import.py | 14 ++- bookwyrm/models/__init__.py | 7 +- bookwyrm/models/bookwyrm_export_job.py | 5 +- bookwyrm/models/bookwyrm_import_job.py | 80 +++++++++-------- bookwyrm/templates/import/import_user.html | 4 +- .../templates/import/user_import_status.html | 78 ++++++++-------- .../templates/import/user_troubleshoot.html | 90 +++++++++++++++++++ bookwyrm/templatetags/utilities.py | 6 +- bookwyrm/urls.py | 7 +- bookwyrm/views/__init__.py | 7 +- bookwyrm/views/imports/import_data.py | 36 +++++--- bookwyrm/views/imports/import_status.py | 45 +++++----- bookwyrm/views/imports/user_troubleshoot.py | 50 +++++++++++ 13 files changed, 305 insertions(+), 124 deletions(-) create mode 100644 bookwyrm/templates/import/user_troubleshoot.html create mode 100644 bookwyrm/views/imports/user_troubleshoot.py diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index f57ce87980..d4fedb4f79 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -23,7 +23,19 @@ def process_import( user=user, archive_file=archive_file, required=required ) - # TODO need to read the tarfile here and create a childjob for each book + return job + + def create_retry_job( + self, user: User, original_job: BookwyrmImportJob + ) -> BookwyrmImportJob: + """retry items that didn't import""" + + job = BookwyrmImportJob.objects.create( + user=user, + archive_file=original_job.archive_file, + required=original_job.required, + retry=True, + ) return job diff --git a/bookwyrm/models/__init__.py b/bookwyrm/models/__init__.py index 5d197661f6..4e024d3228 100644 --- a/bookwyrm/models/__init__.py +++ b/bookwyrm/models/__init__.py @@ -26,7 +26,12 @@ from .group import Group, GroupMember, GroupMemberInvitation from .import_job import ImportJob, ImportItem -from .bookwyrm_import_job import BookwyrmImportJob, UserImportBook, UserImportPost +from .bookwyrm_import_job import ( + BookwyrmImportJob, + UserImportBook, + UserImportPost, + import_book_task, +) from .bookwyrm_export_job import BookwyrmExportJob from .move import MoveUser diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index a42562f30a..c0e63cc45f 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -42,7 +42,10 @@ class BookwyrmExportJob(ParentJob): export_data = FileField(null=True, storage=select_exports_storage) export_json = JSONField(null=True, encoder=DjangoJSONEncoder) - json_completed = BooleanField(default=False) + + """ + TODO: make the _tasks actual subjobs! + """ def start_job(self): """schedule the first task""" diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index 61c2548939..b659d803d8 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -1,19 +1,21 @@ """Import a user from another Bookwyrm instance""" +# TODO: tests + import json import logging import math from django.db.models import ( + BooleanField, ForeignKey, FileField, JSONField, + TextField, TextChoices, - CASCADE, PROTECT, SET_NULL, ) -from django.db.utils import IntegrityError from django.utils import timezone from django.utils.html import strip_tags from django.utils.translation import gettext_lazy as _ @@ -36,6 +38,7 @@ class BookwyrmImportJob(ParentJob): required = DjangoArrayField( models.fields.CharField(max_length=50, blank=True), blank=True ) + retry = BooleanField(default=False) def start_job(self): """Start the job""" @@ -94,6 +97,7 @@ class UserImportBook(ChildJob): book = ForeignKey(models.Book, on_delete=SET_NULL, null=True, blank=True) book_data = JSONField(null=False) + fail_reason = TextField(null=True) def start_job(self): """Start the job""" @@ -117,10 +121,11 @@ class StatusType(TextChoices): status_type = models.fields.CharField( max_length=10, choices=StatusType.choices, default=StatusType.COMMENT, null=True ) + fail_reason = TextField(null=True) def start_job(self): """Start the job""" - upsert_statuses_task.delay(child_id=self.id) + upsert_status_task.delay(child_id=self.id) class UserRelationshipImport(ChildJob): @@ -136,6 +141,7 @@ class RelationshipType(TextChoices): max_length=10, choices=RelationshipType.choices, null=True ) remote_id = models.fields.RemoteIdField(null=True, unique=False) + fail_reason = TextField(null=True) def start_job(self): """Start the job""" @@ -150,8 +156,11 @@ def start_import_task(**kwargs): job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) archive_file = job.bookwyrmimportjob.archive_file - # don't start the job if it was stopped from the UI - if job.complete: + # don't run if user is not allowed imports yet + if user_import_available(user=job.user): + return + + if job.status == "stopped": return job.status = "active" @@ -194,7 +203,6 @@ def start_import_task(**kwargs): book_job.start_job() archive_file.close() - # job.complete_job() except Exception as err: # pylint: disable=broad-except logger.exception("User Import Job %s Failed with error: %s", job.id, err) @@ -202,7 +210,7 @@ def start_import_task(**kwargs): @app.task(queue=IMPORTS, base=SubTask) -def import_book_task(**kwargs): +def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-branches """Take work and edition data, find or create the edition and work in the database""" @@ -211,8 +219,7 @@ def import_book_task(**kwargs): archive_file = job.bookwyrmimportjob.archive_file book_data = task.book_data - # don't start the job if it was stopped from the UI - if job.complete or task.complete: + if task.complete or job.status == "stopped": return try: @@ -227,7 +234,7 @@ def import_book_task(**kwargs): instance = parsed_author.to_model( model=models.Author, save=True, - overwrite=True, # TODO: why do we use overwrite? + overwrite=True, # TODO: why do we use overwrite? (check with test) ) edition["authors"].append(instance.remote_id) @@ -255,8 +262,6 @@ def import_book_task(**kwargs): ) # set the cover image from the tar - # NOTE we don't have the images to go with test json! - # TODO: test this later if cover_path: archive_file.open("rb") with BookwyrmTarFile.open(mode="r:gz", fileobj=archive_file) as tar: @@ -277,11 +282,13 @@ def import_book_task(**kwargs): if "include_lists" in required: upsert_lists(task_user, book.id, book_data.get("lists")) - except Exception as err: + except Exception as err: # pylint: disable=broad-except logger.exception( "Book Import Task %s for Job %s Failed with error: %s", task.id, job.id, err ) - job.set_status("failed") + task.fail_reason = _("unknown") + task.save(update_fields=["fail_reason"]) + task.set_status("failed") # Now import statuses # These are also subtasks so that we can isolate anything that fails @@ -319,7 +326,7 @@ def import_book_task(**kwargs): @app.task(queue=IMPORTS, base=SubTask) -def upsert_statuses_task(**kwargs): +def upsert_status_task(**kwargs): """Find or create book statuses""" task = UserImportPost.objects.get(id=kwargs["child_id"]) @@ -334,13 +341,11 @@ def upsert_statuses_task(**kwargs): else models.Comment ) - # don't start the job if it was stopped from the UI - if job.complete or task.complete: + if task.complete or job.status == "stopped": return try: # only add statuses if this is the same user - logger.info("attributedTo: %s ", status.get("attributedTo", False)) if is_alias(user, status.get("attributedTo", False)): status["attributedTo"] = user.remote_id status["to"] = update_followers_address(user, status["to"]) @@ -377,12 +382,16 @@ def upsert_statuses_task(**kwargs): else: logger.warning( - "User does not have permission to import statuses, or status is tombstone" + "User not authorized to import statuses, or status is tombstone" ) + task.fail_reason = _("unauthorized") + task.save(update_fields=["fail_reason"]) task.set_status("failed") - except Exception as err: - logger.exception("User Import Job %s Failed with error: %s", task.id, err) + except Exception as err: # pylint: disable=broad-except + logger.exception("User Import Task %s Failed with error: %s", task.id, err) + task.fail_reason = _("unknown") + task.save(update_fields=["fail_reason"]) task.set_status("failed") @@ -409,8 +418,6 @@ def upsert_readthroughs(user, book_id, data): if not existing: models.ReadThrough.objects.create(**obj) - return - def upsert_lists( user, @@ -451,8 +458,6 @@ def upsert_lists( order=count + 1, ) - return - def upsert_shelves(user, book, shelves): """Take shelf JSON objects and create @@ -473,8 +478,6 @@ def upsert_shelves(user, book, shelves): book=book, shelf=book_shelf, user=user, shelved_date=timezone.now() ) - return - # user updates ############## @@ -577,9 +580,11 @@ def import_user_relationship_task(**kwargs): logger.exception( "Could not resolve user %s task %s", task.remote_id, task.id ) + task.fail_reason = _("connection_error") + task.save(update_fields=["fail_reason"]) task.set_status("failed") - elif tasks.relationship == "block": + elif task.relationship == "block": user_object = activitypub.resolve_remote_id(task.remote_id, models.User) if user_object: @@ -601,6 +606,8 @@ def import_user_relationship_task(**kwargs): logger.exception( "Could not resolve user %s task %s", task.remote_id, task.id ) + task.fail_reason = _("connection_error") + task.save(update_fields=["fail_reason"]) task.set_status("failed") else: @@ -609,19 +616,14 @@ def import_user_relationship_task(**kwargs): task.relationship, task.id, ) + task.fail_reason = _("invalid_relationship") + task.save(update_fields=["fail_reason"]) task.set_status("failed") - except IntegrityError as err: - # `null value in column "to_user_id" of relation "bookwyrm_user_also_known_as" violates not-null constraint` - # TODO: this seems to indicate that the *alias* doesn't have an ID, which will always be the case - # if we don't have them in our DB already? - # seems like a bug in activitypub.resolve_remote_id? We need to import BOTH users - logger.exception( - "User Import Job %s experienced an IntegrityError: %s", task.id, err - ) - task.set_status("failed") - except Exception as err: - logger.exception("User Import Job %s Failed with error: %s", task.id, err) + except Exception as err: # pylint: disable=broad-except + logger.exception("User Import Task %s Failed with error: %s", task.id, err) + task.fail_reason = _("unknown") + task.save(update_fields=["fail_reason"]) task.set_status("failed") diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html index 9009724166..0f4bf4f5df 100644 --- a/bookwyrm/templates/import/import_user.html +++ b/bookwyrm/templates/import/import_user.html @@ -29,8 +29,8 @@ {% elif next_available %}
-

{% blocktrans %}Currently you are allowed to import one user every {{ user_import_hours }} hours.{% endblocktrans %}

-

{% blocktrans %}You will next be able to import a user file at {{ next_available }}{% endblocktrans %}

+

{% blocktrans with hours=next_available.1 %}Currently you are allowed to import one user every {{ hours }} hours.{% endblocktrans %}

+

{% blocktrans with next_time=next_available.0 %}You will next be able to import a user file at {{ next_time }}{% endblocktrans %}

{% else %}
diff --git a/bookwyrm/templates/import/user_import_status.html b/bookwyrm/templates/import/user_import_status.html index 4f8cbf5629..6f5cf21166 100644 --- a/bookwyrm/templates/import/user_import_status.html +++ b/bookwyrm/templates/import/user_import_status.html @@ -10,7 +10,7 @@

{% block page_title %} {% if job.retry %} - {% trans "Retry Status" %} + {% trans "User Import Retry Status" %} {% else %} {% trans "User Import Status" %} {% endif %} @@ -23,11 +23,7 @@

{% url 'user-import-status' job.id as path %} - {% if job.retry %} - {% trans "Retry Status" %} - {% else %} - {% trans "Import Status" %} - {% endif %} + {% trans "User Import Status" %} {% block breadcrumbs %}{% endblock %} @@ -36,9 +32,9 @@

-
{% trans "Import started:" %}
+
{% trans "Import started:" %}
{{ job.created_date | naturaltime }}
-
Import Job Status:
+
Import Job Status:
{% endif %}
-
{% trans "Books" %}:
-
- {% blocktrans %}total {{ book_jobs_count }}, failed {{ failed_books_count }}{% endblocktrans %} -
-
{% trans "Statuses" %}:
-
- {% blocktrans %}total {{ status_jobs_count }}, failed {{ failed_statuses_count }}{% endblocktrans %} -
-
{% trans "Follows & Blocks" %}:
-
- {% blocktrans %}total {{ relationship_jobs_count }}, failed {{ failed_relationships_count }}{% endblocktrans %} -
+ {% block import_counts %} +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + +
{% trans "Imported" %}{% trans "Failed" %}{% trans "Total" %}
{% trans "Books" %}{{ completed_books_count }}{{ failed_books_count }}{{ book_jobs_count }}
{% trans "Statuses" %}{{ completed_statuses_count }}{{ failed_statuses_count }}{{ status_jobs_count }}
{% trans "Follows & Blocks" %}{{ completed_relationships_count }}{{ failed_relationships_count }}{{ relationship_jobs_count }}
+
+
+ {% endblock %} {% if job.status == "active" and show_progress %}
@@ -108,14 +124,14 @@

{% endif %} - {% if job.complete and fail_count and not job.retry and not legacy %} + {% if job.complete and fail_count and not job.retry %}
{% blocktrans trimmed count counter=fail_count with display_counter=fail_count|intcomma %} {{ display_counter }} item failed to import. {% plural %} {{ display_counter }} items failed to import. {% endblocktrans %} - + {% trans "View and troubleshoot failed items" %}
@@ -124,6 +140,8 @@

{% block actions %}{% endblock %} + {% block item_list %} +

{% trans "Imported books" %}

@@ -145,13 +163,6 @@

{% endblock %}

- {% if not items %} - - - - {% else %} {% for item in items %} @@ -195,9 +199,9 @@

{% block action_row %}{% endblock %} {% endfor %} - {% endif %}
- {% trans "No items currently need review" %} -
@@ -181,13 +192,6 @@

{% else %}
{{ item.status }} - {# retry option if an item appears to be hanging #} - {% if job.created_date != job.updated_date and inactive_time > 24 %} -
- {% csrf_token %} - -
- {% endif %}
{% endif %}

+ {% endblock %}
diff --git a/bookwyrm/templates/import/user_troubleshoot.html b/bookwyrm/templates/import/user_troubleshoot.html new file mode 100644 index 0000000000..85c2504afa --- /dev/null +++ b/bookwyrm/templates/import/user_troubleshoot.html @@ -0,0 +1,90 @@ +{% extends 'import/user_import_status.html' %} +{% load i18n %} +{% load utilities %} + +{% block title %}{% trans "User Import Troubleshooting" %}{% endblock %} + +{% block page_title %} +{% trans "Failed items" %} +{% endblock %} + +{% block breadcrumbs %} +
  • + {% trans "Troubleshooting" %} +
  • +{% endblock %} + +{% block import_counts %}{% endblock %} + +{% block actions %} +
    +
    +

    + {% trans "Re-trying an import can fix missing items in cases such as:" %} +

    +
      +
    • {% trans "Your account was not set as an alias of the original user account" %}
    • +
    • {% trans "A transient error or timeout caused the external data source to be unavailable." %}
    • +
    • {% trans "BookWyrm has been updated since this import with a bug fix" %}
    • +
    +

    + {% trans "Re-trying an import will not work in cases such as:" %} +

    +
      +
    • {% trans "A user, status, or BookWyrm server was deleted after your import file was created" %}
    • +
    • {% trans "Importing statuses when your old account has been deleted" %}
    • +
    +

    + {% trans "Contact your admin or open an issue if you are seeing unexpected failed items." %} +

    +
    + {% if next_available %} +
    +

    {% blocktrans with hours=next_available.1 %}Currently you are allowed to import or retry one user every {{ hours }} hours.{% endblocktrans %}

    +

    {% blocktrans with next_time=next_available.0 %}You will be able to retry this import at {{ next_time }}{% endblocktrans %}

    +
    + {% else %} +
    + {% csrf_token %} + +
    + {% endif %} +
    +{% endblock %} +{% block item_list %} +
    + + + + + + + + {% for item in items %} + + + + + + + {% endfor %} +
    + {% trans "Book" %} + + {% trans "Status" %} + + {% trans "Relationship" %} + + {% trans "Reason" %} +
    {{ item.userimportpost.book.title }}{{ item.userimportpost.json.name }}{% id_to_username item.userrelationshipimport.remote_id True %} + {% if item.fail_reason == "unauthorized" %} + Not authorized to import statuses + {% elif item.fail_reason == "connection_error" %} + Could not connect to remote identity + {% elif item.fail_reason == "invalid_relationship" %} + Invalid relationship type - please log an issue + {% else %} + Unknown error + {% endif %} +
    +{% endblock %} \ No newline at end of file diff --git a/bookwyrm/templatetags/utilities.py b/bookwyrm/templatetags/utilities.py index ab597a22a2..ce8b6d16f3 100644 --- a/bookwyrm/templatetags/utilities.py +++ b/bookwyrm/templatetags/utilities.py @@ -116,7 +116,7 @@ def get_isni(existing, author, autoescape=True): @register.simple_tag(takes_context=False) -def id_to_username(user_id): +def id_to_username(user_id, return_empty=False): """given an arbitrary remote id, return the username""" if user_id: url = urlparse(user_id) @@ -126,6 +126,10 @@ def id_to_username(user_id): value = f"{name}@{domain}" return value + + if return_empty: + return "" + return _("a new user account") diff --git a/bookwyrm/urls.py b/bookwyrm/urls.py index 8010bd0932..6271e6c92b 100644 --- a/bookwyrm/urls.py +++ b/bookwyrm/urls.py @@ -451,7 +451,7 @@ ), re_path( r"^user-import/(?P\d+)/stop/?$", - views.user_stop_import, + views.stop_user_import, name="user-import-stop", ), re_path( @@ -464,6 +464,11 @@ views.ImportTroubleshoot.as_view(), name="import-troubleshoot", ), + re_path( + r"^user-import/(?P\d+)/failed/?$", + views.UserImportTroubleshoot.as_view(), + name="user-import-troubleshoot", + ), re_path( r"^import/(?P\d+)/review/?$", views.ImportManualReview.as_view(), diff --git a/bookwyrm/views/__init__.py b/bookwyrm/views/__init__.py index c7730f7464..1e72007d62 100644 --- a/bookwyrm/views/__init__.py +++ b/bookwyrm/views/__init__.py @@ -85,16 +85,17 @@ from .shelf.shelf_actions import create_shelf, delete_shelf from .shelf.shelf_actions import shelve, unshelve -# csv import -from .imports.import_data import Import, UserImport +# csv and user import +from .imports.import_data import Import, UserImport, user_import_available from .imports.import_status import ( ImportStatus, UserImportStatus, retry_item, stop_import, - user_stop_import, + stop_user_import, ) from .imports.troubleshoot import ImportTroubleshoot +from .imports.user_troubleshoot import UserImportTroubleshoot from .imports.manually_review import ( ImportManualReview, approve_import_item, diff --git a/bookwyrm/views/imports/import_data.py b/bookwyrm/views/imports/import_data.py index 59686c1f0a..8ae7a0f0d2 100644 --- a/bookwyrm/views/imports/import_data.py +++ b/bookwyrm/views/imports/import_data.py @@ -1,6 +1,7 @@ """ import books from another app """ from io import TextIOWrapper import datetime +from typing import Optional from django.contrib.auth.decorators import login_required from django.db.models import Avg, ExpressionWrapper, F, fields @@ -149,25 +150,12 @@ def get(self, request, invalid=False): jobs = BookwyrmImportJob.objects.filter(user=request.user).order_by( "-created_date" ) - site = models.SiteSettings.objects.get() - hours = site.user_import_time_limit - allowed = ( - jobs.first().created_date < timezone.now() - datetime.timedelta(hours=hours) - if jobs.first() - else True - ) - next_available = ( - jobs.first().created_date + datetime.timedelta(hours=hours) - if not allowed - else False - ) paginated = Paginator(jobs, PAGE_LENGTH) page = paginated.get_page(request.GET.get("page")) data = { "import_form": forms.ImportUserForm(), "jobs": page, - "user_import_hours": hours, - "next_available": next_available, + "next_available": user_import_available(user=request.user), "page_range": paginated.get_elided_page_range( page.number, on_each_side=2, on_ends=1 ), @@ -179,6 +167,10 @@ def get(self, request, invalid=False): def post(self, request): """ingest a Bookwyrm json file""" + site = models.SiteSettings.objects.get() + if not site.imports_enabled: + raise PermissionDenied() + importer = BookwyrmImporter() form = forms.ImportUserForm(request.POST, request.FILES) @@ -194,3 +186,19 @@ def post(self, request): job.start_job() return redirect("user-import") + + +def user_import_available(user: models.User) -> Optional[tuple[datetime, int]]: + + jobs = BookwyrmImportJob.objects.filter(user=user).order_by("-created_date") + site = models.SiteSettings.objects.get() + hours = site.user_import_time_limit + allowed = ( + jobs.first().created_date < timezone.now() - datetime.timedelta(hours=hours) + if jobs.first() + else True + ) + if allowed and site.imports_enabled: + return + + return (jobs.first().created_date + datetime.timedelta(hours=hours), hours) diff --git a/bookwyrm/views/imports/import_status.py b/bookwyrm/views/imports/import_status.py index cdc387ab28..0201a17741 100644 --- a/bookwyrm/views/imports/import_status.py +++ b/bookwyrm/views/imports/import_status.py @@ -103,39 +103,47 @@ def get(self, request, job_id): page = paginated.get_page(request.GET.get("page")) book_jobs_count = job.book_tasks.count() or "(pending...)" - failed_books_count = job.book_tasks.filter(status="failed").count() or 0 if job.complete and not job.book_tasks.count(): book_jobs_count = 0 - status_jobs_count = job.status_tasks.count() or "(ending...)" + status_jobs_count = job.status_tasks.count() or "(pending...)" if job.complete and not job.status_tasks.count(): status_jobs_count = 0 - failed_statuses_count = job.status_tasks.filter(status="failed").count() or 0 relationship_jobs_count = job.relationship_tasks.count() or "(pending...)" if job.complete and not job.relationship_tasks.count(): relationship_jobs_count = 0 - failed_relationships_count = ( - job.relationship_tasks.filter(status="failed").count() or 0 - ) - - pending_item_count = job.pending_item_count data = { "job": job, "items": page, + "completed_books_count": job.book_tasks.filter(status="complete").count() + or 0, + "completed_statuses_count": job.status_tasks.filter( + status="complete" + ).count() + or 0, + "completed_relationships_count": job.relationship_tasks.filter( + status="complete" + ).count() + or 0, + "failed_books_count": job.book_tasks.filter(status="failed").count() or 0, + "failed_statuses_count": job.status_tasks.filter(status="failed").count() + or 0, + "failed_relationships_count": job.relationship_tasks.filter( + status="failed" + ).count() + or 0, + "fail_count": job.child_jobs.filter(status="failed").count(), "book_jobs_count": book_jobs_count, "status_jobs_count": status_jobs_count, "relationship_jobs_count": relationship_jobs_count, - "failed_books_count": failed_books_count, - "failed_statuses_count": failed_statuses_count, - "failed_relationships_count": failed_relationships_count, "page_range": paginated.get_elided_page_range( page.number, on_each_side=2, on_ends=1 ), "show_progress": True, "item_count": item_count, - "complete_count": item_count - pending_item_count, + "complete_count": item_count - job.pending_item_count, "percent": job.percent_complete, # hours since last import item update "inactive_time": (job.updated_date - timezone.now()).seconds / 60 / 60, @@ -146,19 +154,8 @@ def get(self, request, job_id): @login_required @require_POST -def user_stop_import(request, job_id): +def stop_user_import(request, job_id): """scrap that""" job = get_object_or_404(models.BookwyrmImportJob, id=job_id, user=request.user) job.stop_job() return redirect("user-import-status", job_id) - - -@login_required -@require_POST -def user_retry_item(request, job_id, item_id): - """retry an item from a user import""" - item = get_object_or_404( - models.UserImportBook, id=item_id, job__id=job_id, job__user=request.user - ) - import_book_task.delay(child_id=item_id, job_id=job_id) - return redirect("user-import-status", job_id) diff --git a/bookwyrm/views/imports/user_troubleshoot.py b/bookwyrm/views/imports/user_troubleshoot.py new file mode 100644 index 0000000000..5b06f8fe25 --- /dev/null +++ b/bookwyrm/views/imports/user_troubleshoot.py @@ -0,0 +1,50 @@ +""" import books from another app """ +from django.contrib.auth.decorators import login_required +from django.core.exceptions import PermissionDenied +from django.core.paginator import Paginator +from django.shortcuts import get_object_or_404, redirect +from django.template.response import TemplateResponse +from django.utils.decorators import method_decorator +from django.urls import reverse +from django.views import View + +from bookwyrm import models +from bookwyrm.importers import Importer, BookwyrmImporter +from bookwyrm.views import user_import_available +from bookwyrm.settings import PAGE_LENGTH + +# pylint: disable= no-self-use +@method_decorator(login_required, name="dispatch") +class UserImportTroubleshoot(View): + """failed items in an existing user import""" + + def get(self, request, job_id): + """status of an import job""" + job = get_object_or_404(models.BookwyrmImportJob, id=job_id) + if job.user != request.user: + raise PermissionDenied() + + items = job.child_jobs.order_by("task_id").filter(status="failed") + paginated = Paginator(items, PAGE_LENGTH) + page = paginated.get_page(request.GET.get("page")) + data = { + "next_available": user_import_available(user=request.user), + "job": job, + "items": page, + "page_range": paginated.get_elided_page_range( + page.number, on_each_side=2, on_ends=1 + ), + "complete": True, + "page_path": reverse("user-import-troubleshoot", args=[job.id]), + } + + return TemplateResponse(request, "import/user_troubleshoot.html", data) + + def post(self, request, job_id): + """retry lines from a user import""" + job = get_object_or_404(models.BookwyrmImportJob, id=job_id) + + importer = BookwyrmImporter() + job = importer.create_retry_job(request.user, job) + job.start_job() + return redirect(f"/user-import/{job.id}") From 4388eb5a45be2cdd3414ec1b6635245db3dcf0b6 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 2 Sep 2024 13:21:32 +1000 Subject: [PATCH 06/16] update tests and clean up user export --- bookwyrm/importers/bookwyrm_import.py | 1 + ...> 0210_userrelationshipimport_and_more.py} | 16 +- bookwyrm/models/bookwyrm_export_job.py | 71 +++----- bookwyrm/models/bookwyrm_import_job.py | 30 +-- .../importers/test_bookwyrm_user_import.py | 46 +++++ .../tests/models/test_bookwyrm_import_job.py | 171 ++++++++++-------- bookwyrm/views/imports/import_data.py | 5 +- bookwyrm/views/imports/user_troubleshoot.py | 2 +- 8 files changed, 197 insertions(+), 145 deletions(-) rename bookwyrm/migrations/{0208_userrelationshipimport_and_more.py => 0210_userrelationshipimport_and_more.py} (88%) create mode 100644 bookwyrm/tests/importers/test_bookwyrm_user_import.py diff --git a/bookwyrm/importers/bookwyrm_import.py b/bookwyrm/importers/bookwyrm_import.py index d4fedb4f79..7fcb7109e1 100644 --- a/bookwyrm/importers/bookwyrm_import.py +++ b/bookwyrm/importers/bookwyrm_import.py @@ -25,6 +25,7 @@ def process_import( return job + # TODO: need a test for this (in a new file) def create_retry_job( self, user: User, original_job: BookwyrmImportJob ) -> BookwyrmImportJob: diff --git a/bookwyrm/migrations/0208_userrelationshipimport_and_more.py b/bookwyrm/migrations/0210_userrelationshipimport_and_more.py similarity index 88% rename from bookwyrm/migrations/0208_userrelationshipimport_and_more.py rename to bookwyrm/migrations/0210_userrelationshipimport_and_more.py index 03a8aa42bd..e47ad52536 100644 --- a/bookwyrm/migrations/0208_userrelationshipimport_and_more.py +++ b/bookwyrm/migrations/0210_userrelationshipimport_and_more.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.15 on 2024-08-26 10:16 +# Generated by Django 4.2.15 on 2024-09-03 11:22 import bookwyrm.models.fields import django.contrib.postgres.fields @@ -9,7 +9,7 @@ class Migration(migrations.Migration): dependencies = [ - ("bookwyrm", "0207_sqlparse_update"), + ("bookwyrm", "0209_user_show_ratings"), ] operations = [ @@ -43,12 +43,22 @@ class Migration(migrations.Migration): validators=[bookwyrm.models.fields.validate_remote_id], ), ), + ("fail_reason", models.TextField(null=True)), ], options={ "abstract": False, }, bases=("bookwyrm.childjob",), ), + migrations.RemoveField( + model_name="bookwyrmexportjob", + name="json_completed", + ), + migrations.AddField( + model_name="bookwyrmimportjob", + name="retry", + field=models.BooleanField(default=False), + ), migrations.AlterField( model_name="bookwyrmimportjob", name="required", @@ -86,6 +96,7 @@ class Migration(migrations.Migration): null=True, ), ), + ("fail_reason", models.TextField(null=True)), ( "book", bookwyrm.models.fields.ForeignKey( @@ -114,6 +125,7 @@ class Migration(migrations.Migration): ), ), ("book_data", models.JSONField()), + ("fail_reason", models.TextField(null=True)), ( "book", models.ForeignKey( diff --git a/bookwyrm/models/bookwyrm_export_job.py b/bookwyrm/models/bookwyrm_export_job.py index c0e63cc45f..95ede303f8 100644 --- a/bookwyrm/models/bookwyrm_export_job.py +++ b/bookwyrm/models/bookwyrm_export_job.py @@ -6,7 +6,7 @@ from boto3.session import Session as BotoSession from s3_tar import S3Tar -from django.db.models import BooleanField, FileField, JSONField +from django.db.models import FileField, JSONField from django.core.serializers.json import DjangoJSONEncoder from django.core.files.base import ContentFile from django.core.files.storage import storages @@ -17,7 +17,7 @@ from bookwyrm.models import Review, Comment, Quotation from bookwyrm.models import Edition from bookwyrm.models import UserFollows, User, UserBlocks -from bookwyrm.models.job import ParentJob +from bookwyrm.models.job import ParentJob, ParentTask from bookwyrm.tasks import app, IMPORTS from bookwyrm.utils.tar import BookwyrmTarFile @@ -43,40 +43,40 @@ class BookwyrmExportJob(ParentJob): export_data = FileField(null=True, storage=select_exports_storage) export_json = JSONField(null=True, encoder=DjangoJSONEncoder) - """ - TODO: make the _tasks actual subjobs! - """ - def start_job(self): """schedule the first task""" - task = create_export_json_task.delay(job_id=self.id) - self.task_id = task.id - self.save(update_fields=["task_id"]) + self.set_status("active") + create_export_json_task.delay(job_id=self.id) -@app.task(queue=IMPORTS) -def create_export_json_task(job_id): +@app.task(queue=IMPORTS, base=ParentTask) +def create_export_json_task(**kwargs): """create the JSON data for the export""" - job = BookwyrmExportJob.objects.get(id=job_id) - + job = BookwyrmExportJob.objects.get(id=kwargs["job_id"]) # don't start the job if it was stopped from the UI - if job.complete: + if job.status == "stopped": return try: - job.set_status("active") - - # generate JSON structure - job.export_json = export_json(job.user) + # generate JSON + data = export_user(job.user) + data["settings"] = export_settings(job.user) + data["goals"] = export_goals(job.user) + data["books"] = export_books(job.user) + data["saved_lists"] = export_saved_lists(job.user) + data["follows"] = export_follows(job.user) + data["blocks"] = export_blocks(job.user) + job.export_json = data job.save(update_fields=["export_json"]) - # create archive in separate task + # trigger task to create tar file create_archive_task.delay(job_id=job.id) + except Exception as err: # pylint: disable=broad-except logger.exception( - "create_export_json_task for %s failed with error: %s", job, err + "create_export_json_task for job %s failed with error: %s", job.id, err ) job.set_status("failed") @@ -97,21 +97,20 @@ def add_file_to_s3_tar(s3_tar: S3Tar, storage, file, directory=""): ) -@app.task(queue=IMPORTS) -def create_archive_task(job_id): +@app.task(queue=IMPORTS, base=ParentTask) +def create_archive_task(**kwargs): """create the archive containing the JSON file and additional files""" - job = BookwyrmExportJob.objects.get(id=job_id) + job = BookwyrmExportJob.objects.get(id=kwargs["job_id"]) # don't start the job if it was stopped from the UI - if job.complete: + if job.status == "stopped": return try: export_task_id = str(job.task_id) archive_filename = f"{export_task_id}.tar.gz" export_json_bytes = DjangoJSONEncoder().encode(job.export_json).encode("utf-8") - user = job.user editions = get_books_for_user(user) @@ -172,25 +171,15 @@ def create_archive_task(job_id): tar.add_image(edition.cover, directory="images") job.save(update_fields=["export_data"]) - job.set_status("completed") + job.complete_job() except Exception as err: # pylint: disable=broad-except - logger.exception("create_archive_task for %s failed with error: %s", job, err) + logger.exception( + "create_archive_task for job %s failed with error: %s", job.id, err + ) job.set_status("failed") -def export_json(user: User): - """create export JSON""" - data = export_user(user) # in the root of the JSON structure - data["settings"] = export_settings(user) - data["goals"] = export_goals(user) - data["books"] = export_books(user) - data["saved_lists"] = export_saved_lists(user) - data["follows"] = export_follows(user) - data["blocks"] = export_blocks(user) - return data - - def export_user(user: User): """export user data""" data = user.to_activity() @@ -319,11 +308,9 @@ def export_book(user: User, edition: Edition): def get_books_for_user(user): """ Get all the books and editions related to a user. - We use union() instead of Q objects because it creates - multiple simple queries in stead of a much more complex DB query + multiple simple queries instead of a complex DB query that can time out. - """ shelf_eds = Edition.objects.select_related("parent_work").filter(shelves__user=user) diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index b659d803d8..d184c6afda 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -1,7 +1,5 @@ """Import a user from another Bookwyrm instance""" -# TODO: tests - import json import logging import math @@ -156,10 +154,6 @@ def start_import_task(**kwargs): job = BookwyrmImportJob.objects.get(id=kwargs["job_id"]) archive_file = job.bookwyrmimportjob.archive_file - # don't run if user is not allowed imports yet - if user_import_available(user=job.user): - return - if job.status == "stopped": return @@ -182,13 +176,11 @@ def start_import_task(**kwargs): update_goals(job.user, job.import_data.get("goals", [])) if "include_saved_lists" in job.required: upsert_saved_lists(job.user, job.import_data.get("saved_lists", [])) - if "include_follows" in job.required: for remote_id in job.import_data.get("follows", []): UserRelationshipImport.objects.create( parent_job=job, remote_id=remote_id, relationship="follow" ) - if "include_blocks" in job.required: for remote_id in job.import_data.get("blocks", []): UserRelationshipImport.objects.create( @@ -230,8 +222,7 @@ def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-bran # replace the old author ids in the edition JSON edition["authors"] = [] for author in book_data.get("authors"): - parsed_author = activitypub.parse(author) - instance = parsed_author.to_model( + instance = activitypub.parse(author).to_model( model=models.Author, save=True, overwrite=True, # TODO: why do we use overwrite? (check with test) @@ -248,16 +239,14 @@ def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-bran # first we need the parent work to exist work = book_data.get("work") work["editions"] = [] - parsed_work = activitypub.parse(work) - work_instance = parsed_work.to_model( + work_instance = activitypub.parse(work).to_model( model=models.Work, save=True, overwrite=True ) # now we have a work we can add it to the edition # and create the edition model instance edition["work"] = work_instance.remote_id - parsed_edition = activitypub.parse(edition) - book = parsed_edition.to_model( + book = activitypub.parse(edition).to_model( model=models.Edition, save=True, overwrite=True ) @@ -271,16 +260,17 @@ def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-bran task.book = book task.save(update_fields=["book"]) required = task.parent_job.bookwyrmimportjob.required - task_user = task.parent_job.user if "include_shelves" in required: - upsert_shelves(task_user, book, book_data.get("shelves")) + upsert_shelves(task.parent_job.user, book, book_data.get("shelves")) if "include_readthroughs" in required: - upsert_readthroughs(task_user, book.id, book_data.get("readthroughs")) + upsert_readthroughs( + task.parent_job.user, book.id, book_data.get("readthroughs") + ) if "include_lists" in required: - upsert_lists(task_user, book.id, book_data.get("lists")) + upsert_lists(task.parent_job.user, book.id, book_data.get("lists")) except Exception as err: # pylint: disable=broad-except logger.exception( @@ -335,9 +325,9 @@ def upsert_status_task(**kwargs): status = task.json status_class = ( models.Review - if task.StatusType.REVIEW + if task.status_type == "review" else models.Quotation - if task.StatusType.QUOTE + if task.status_type == "quote" else models.Comment ) diff --git a/bookwyrm/tests/importers/test_bookwyrm_user_import.py b/bookwyrm/tests/importers/test_bookwyrm_user_import.py new file mode 100644 index 0000000000..72aafd29a4 --- /dev/null +++ b/bookwyrm/tests/importers/test_bookwyrm_user_import.py @@ -0,0 +1,46 @@ +""" testing bookwyrm user import """ +from unittest.mock import patch +from django.test import TestCase +from bookwyrm import models +from bookwyrm.importers import BookwyrmImporter + + +class BookwyrmUserImport(TestCase): + """importing from BookWyrm user import""" + + def setUp(self): + """setting stuff up""" + with ( + patch("bookwyrm.suggested_users.rerank_suggestions_task.delay"), + patch("bookwyrm.activitystreams.populate_stream_task.delay"), + patch("bookwyrm.lists_stream.populate_lists_task.delay"), + patch("bookwyrm.suggested_users.rerank_user_task.delay"), + ): + self.user = models.User.objects.create_user( + "mouse", "mouse@mouse.mouse", "password", local=True, localname="mouse" + ) + + def test_create_retry_job(self): + """test retrying a user import""" + + job = models.bookwyrm_import_job.BookwyrmImportJob.objects.create( + user=self.user, required=[] + ) + + job.complete_job() + self.assertEqual(job.retry, False) + self.assertEqual( + models.bookwyrm_import_job.BookwyrmImportJob.objects.count(), 1 + ) + + # retry the job + importer = BookwyrmImporter() + importer.create_retry_job(user=self.user, original_job=job) + + retry_job = models.bookwyrm_import_job.BookwyrmImportJob.objects.last() + + self.assertEqual( + models.bookwyrm_import_job.BookwyrmImportJob.objects.count(), 2 + ) + self.assertEqual(retry_job.retry, True) + self.assertNotEqual(job.id, retry_job.id) diff --git a/bookwyrm/tests/models/test_bookwyrm_import_job.py b/bookwyrm/tests/models/test_bookwyrm_import_job.py index 7d4bdb5996..e9e6f2600b 100644 --- a/bookwyrm/tests/models/test_bookwyrm_import_job.py +++ b/bookwyrm/tests/models/test_bookwyrm_import_job.py @@ -49,8 +49,9 @@ def setUp(self): "badger", "badger@badger.badger", "password", - local=True, + local=False, localname="badger", + remote_id="badger@remote.remote", ) self.work = models.Work.objects.create(title="Sand Talk") @@ -75,6 +76,10 @@ def setUp(self): "../data/bookwyrm_account_export.tar.gz" ) + self.job = bookwyrm_import_job.BookwyrmImportJob.objects.create( + user=self.local_user, required=[] + ) + def test_update_user_profile(self): """Test update the user's profile from import data""" @@ -195,8 +200,14 @@ def test_upsert_saved_lists_not_existing(self): self.assertTrue(self.local_user.saved_lists.filter(id=book_list.id).exists()) - def test_upsert_follows(self): - """Test take a list of remote ids and add as follows""" + def test_follow_relationship(self): + """Test take a remote ID and create a follow""" + + task = bookwyrm_import_job.UserRelationshipImport.objects.create( + parent_job=self.job, + relationship="follow", + remote_id="https://blah.blah/user/rat", + ) before_follow = models.UserFollows.objects.filter( user_subject=self.local_user, user_object=self.rat_user @@ -208,18 +219,24 @@ def test_upsert_follows(self): patch("bookwyrm.activitystreams.add_user_statuses_task.delay"), patch("bookwyrm.lists_stream.add_user_lists_task.delay"), patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), + patch("bookwyrm.activitypub.resolve_remote_id", return_value=self.rat_user), ): - models.bookwyrm_import_job.upsert_follows( - self.local_user, self.json_data.get("follows") - ) + + bookwyrm_import_job.import_user_relationship_task(child_id=task.id) after_follow = models.UserFollows.objects.filter( user_subject=self.local_user, user_object=self.rat_user ).exists() self.assertTrue(after_follow) - def test_upsert_user_blocks(self): - """test adding blocked users""" + def test_block_relationship(self): + """test adding blocks for users""" + + task = bookwyrm_import_job.UserRelationshipImport.objects.create( + parent_job=self.job, + relationship="block", + remote_id="https://blah.blah/user/badger", + ) blocked_before = models.UserBlocks.objects.filter( Q( @@ -234,10 +251,11 @@ def test_upsert_user_blocks(self): patch("bookwyrm.activitystreams.remove_user_statuses_task.delay"), patch("bookwyrm.lists_stream.remove_user_lists_task.delay"), patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), + patch( + "bookwyrm.activitypub.resolve_remote_id", return_value=self.badger_user + ), ): - models.bookwyrm_import_job.upsert_user_blocks( - self.local_user, self.json_data.get("blocks") - ) + bookwyrm_import_job.import_user_relationship_task(child_id=task.id) blocked_after = models.UserBlocks.objects.filter( Q( @@ -248,37 +266,29 @@ def test_upsert_user_blocks(self): self.assertTrue(blocked_after) def test_get_or_create_edition_existing(self): - """Test take a JSON string of books and editions, - find or create the editions in the database and - return a list of edition instances""" + """Test import existing book""" + + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, + book_data=self.json_data["books"][1], + ) self.assertEqual(models.Edition.objects.count(), 1) - with ( - open(self.archive_file, "rb") as fileobj, - BookwyrmTarFile.open(mode="r:gz", fileobj=fileobj) as tarfile, - ): - bookwyrm_import_job.get_or_create_edition( - self.json_data["books"][1], tarfile - ) # Sand Talk + bookwyrm_import_job.import_book_task(child_id=task.id) - self.assertEqual(models.Edition.objects.count(), 1) + self.assertEqual(models.Edition.objects.count(), 1) def test_get_or_create_edition_not_existing(self): - """Test take a JSON string of books and editions, - find or create the editions in the database and - return a list of edition instances""" - - self.assertEqual(models.Edition.objects.count(), 1) + """Test import new book""" - with ( - open(self.archive_file, "rb") as fileobj, - BookwyrmTarFile.open(mode="r:gz", fileobj=fileobj) as tarfile, - ): - bookwyrm_import_job.get_or_create_edition( - self.json_data["books"][0], tarfile - ) # Seeing like a state + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, + book_data=self.json_data["books"][0], + ) + self.assertEqual(models.Edition.objects.count(), 1) + bookwyrm_import_job.import_book_task(child_id=task.id) self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists()) self.assertEqual(models.Edition.objects.count(), 2) @@ -305,7 +315,7 @@ def test_upsert_readthroughs(self): self.assertEqual(models.ReadThrough.objects.count(), 0) bookwyrm_import_job.upsert_readthroughs( - readthroughs, self.local_user, self.book.id + self.local_user, self.book.id, readthroughs ) self.assertEqual(models.ReadThrough.objects.count(), 1) @@ -320,15 +330,17 @@ def test_upsert_readthroughs(self): def test_get_or_create_review(self): """Test get_or_create_review_status with a review""" + task = bookwyrm_import_job.UserImportPost.objects.create( + parent_job=self.job, + book=self.book, + json=self.json_data["books"][0]["reviews"][0], + status_type="review", + ) + self.assertEqual(models.Review.objects.filter(user=self.local_user).count(), 0) - reviews = self.json_data["books"][0]["reviews"] - with ( - patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), - patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True), - ): - bookwyrm_import_job.upsert_statuses( - self.local_user, models.Review, reviews, self.book.remote_id - ) + + with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True): + bookwyrm_import_job.upsert_status_task(child_id=task.id) self.assertEqual(models.Review.objects.filter(user=self.local_user).count(), 1) self.assertEqual( @@ -356,16 +368,18 @@ def test_get_or_create_review(self): def test_get_or_create_comment(self): """Test get_or_create_review_status with a comment""" + task = bookwyrm_import_job.UserImportPost.objects.create( + parent_job=self.job, + book=self.book, + json=self.json_data["books"][1]["comments"][0], + status_type="comment", + ) + self.assertEqual(models.Comment.objects.filter(user=self.local_user).count(), 0) - comments = self.json_data["books"][1]["comments"] - with ( - patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), - patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True), - ): - bookwyrm_import_job.upsert_statuses( - self.local_user, models.Comment, comments, self.book.remote_id - ) + with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True): + bookwyrm_import_job.upsert_status_task(child_id=task.id) + self.assertEqual(models.Comment.objects.filter(user=self.local_user).count(), 1) self.assertEqual( models.Comment.objects.filter(book=self.book).first().content, @@ -384,18 +398,20 @@ def test_get_or_create_comment(self): def test_get_or_create_quote(self): """Test get_or_create_review_status with a quote""" + task = bookwyrm_import_job.UserImportPost.objects.create( + parent_job=self.job, + book=self.book, + json=self.json_data["books"][1]["quotations"][0], + status_type="quote", + ) + self.assertEqual( models.Quotation.objects.filter(user=self.local_user).count(), 0 ) - quotes = self.json_data["books"][1]["quotations"] - with ( - patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), - patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True), - ): - bookwyrm_import_job.upsert_statuses( - self.local_user, models.Quotation, quotes, self.book.remote_id - ) + with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=True): + bookwyrm_import_job.upsert_status_task(child_id=task.id) + self.assertEqual( models.Quotation.objects.filter(user=self.local_user).count(), 1 ) @@ -418,18 +434,18 @@ def test_get_or_create_quote(self): def test_get_or_create_quote_unauthorized(self): """Test get_or_create_review_status with a quote but not authorized""" + task = bookwyrm_import_job.UserImportPost.objects.create( + parent_job=self.job, + book=self.book, + json=self.json_data["books"][1]["quotations"][0], + status="quote", + ) + self.assertEqual( models.Quotation.objects.filter(user=self.local_user).count(), 0 ) - quotes = self.json_data["books"][1]["quotations"] - with ( - patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), - patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=False), - ): - - bookwyrm_import_job.upsert_statuses( - self.local_user, models.Quotation, quotes, self.book.remote_id - ) + with patch("bookwyrm.models.bookwyrm_import_job.is_alias", return_value=False): + bookwyrm_import_job.upsert_status_task(child_id=task.id) self.assertEqual( models.Quotation.objects.filter(user=self.local_user).count(), 0 ) @@ -438,8 +454,6 @@ def test_upsert_list_existing(self): """Take a list and ListItems as JSON and create DB entries if they don't already exist""" - book_data = self.json_data["books"][0] - other_book = models.Edition.objects.create( title="Another Book", remote_id="https://example.com/book/9876" ) @@ -471,8 +485,8 @@ def test_upsert_list_existing(self): ): bookwyrm_import_job.upsert_lists( self.local_user, - book_data["lists"], other_book.id, + self.json_data["books"][0]["lists"], ) self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1) @@ -488,8 +502,6 @@ def test_upsert_list_not_existing(self): """Take a list and ListItems as JSON and create DB entries if they don't already exist""" - book_data = self.json_data["books"][0] - self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 0) self.assertFalse(models.ListItem.objects.filter(book=self.book.id).exists()) @@ -499,8 +511,8 @@ def test_upsert_list_not_existing(self): ): bookwyrm_import_job.upsert_lists( self.local_user, - book_data["lists"], self.book.id, + self.json_data["books"][0]["lists"], ) self.assertEqual(models.List.objects.filter(user=self.local_user).count(), 1) @@ -526,12 +538,13 @@ def test_upsert_shelves_existing(self): book=self.book, shelf=shelf, user=self.local_user ) - book_data = self.json_data["books"][0] with ( patch("bookwyrm.activitystreams.add_book_statuses_task.delay"), patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), ): - bookwyrm_import_job.upsert_shelves(self.book, self.local_user, book_data) + bookwyrm_import_job.upsert_shelves( + self.local_user, self.book, self.json_data["books"][0].get("shelves") + ) self.assertEqual( models.ShelfBook.objects.filter(user=self.local_user.id).count(), 2 @@ -545,13 +558,13 @@ def test_upsert_shelves_not_existing(self): models.ShelfBook.objects.filter(user=self.local_user.id).count(), 0 ) - book_data = self.json_data["books"][0] - with ( patch("bookwyrm.activitystreams.add_book_statuses_task.delay"), patch("bookwyrm.models.activitypub_mixin.broadcast_task.apply_async"), ): - bookwyrm_import_job.upsert_shelves(self.book, self.local_user, book_data) + bookwyrm_import_job.upsert_shelves( + self.local_user, self.book, self.json_data["books"][0].get("shelves") + ) self.assertEqual( models.ShelfBook.objects.filter(user=self.local_user.id).count(), 2 diff --git a/bookwyrm/views/imports/import_data.py b/bookwyrm/views/imports/import_data.py index 8ae7a0f0d2..3e02a8faa2 100644 --- a/bookwyrm/views/imports/import_data.py +++ b/bookwyrm/views/imports/import_data.py @@ -189,6 +189,9 @@ def post(self, request): def user_import_available(user: models.User) -> Optional[tuple[datetime, int]]: + """for a given user, determine whether they are allowed to run + a user import and if not, return a tuple with the next available + time they can import, and how many hours between imports allowed""" jobs = BookwyrmImportJob.objects.filter(user=user).order_by("-created_date") site = models.SiteSettings.objects.get() @@ -199,6 +202,6 @@ def user_import_available(user: models.User) -> Optional[tuple[datetime, int]]: else True ) if allowed and site.imports_enabled: - return + return False return (jobs.first().created_date + datetime.timedelta(hours=hours), hours) diff --git a/bookwyrm/views/imports/user_troubleshoot.py b/bookwyrm/views/imports/user_troubleshoot.py index 5b06f8fe25..bf2fed7e5c 100644 --- a/bookwyrm/views/imports/user_troubleshoot.py +++ b/bookwyrm/views/imports/user_troubleshoot.py @@ -9,7 +9,7 @@ from django.views import View from bookwyrm import models -from bookwyrm.importers import Importer, BookwyrmImporter +from bookwyrm.importers import BookwyrmImporter from bookwyrm.views import user_import_available from bookwyrm.settings import PAGE_LENGTH From ae3e67c7fbaf49517147be1e14a0798507bf7ee1 Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 8 Sep 2024 15:50:34 +1000 Subject: [PATCH 07/16] fix user imports and add tests - adds tests for importing books in user import - fixes bug where new authors were not added to Work, throwing error --- bookwyrm/models/bookwyrm_import_job.py | 12 +- bookwyrm/tests/data/user_import.json | 2 +- .../tests/models/test_bookwyrm_import_job.py | 212 +++++++++++++++++- 3 files changed, 211 insertions(+), 15 deletions(-) diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index d184c6afda..384383e5a6 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -216,19 +216,20 @@ def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-bran try: edition = book_data.get("edition") + work = book_data.get("work") book = models.Edition.find_existing(edition) if not book: # make sure we have the authors in the local DB # replace the old author ids in the edition JSON edition["authors"] = [] + work["authors"] = [] for author in book_data.get("authors"): instance = activitypub.parse(author).to_model( - model=models.Author, - save=True, - overwrite=True, # TODO: why do we use overwrite? (check with test) + model=models.Author, save=True, overwrite=False ) edition["authors"].append(instance.remote_id) + work["authors"].append(instance.remote_id) # we will add the cover later from the tar # don't try to load it from the old server @@ -237,17 +238,16 @@ def import_book_task(**kwargs): # pylint: disable=too-many-locals,too-many-bran edition["cover"] = {} # first we need the parent work to exist - work = book_data.get("work") work["editions"] = [] work_instance = activitypub.parse(work).to_model( - model=models.Work, save=True, overwrite=True + model=models.Work, save=True, overwrite=False ) # now we have a work we can add it to the edition # and create the edition model instance edition["work"] = work_instance.remote_id book = activitypub.parse(edition).to_model( - model=models.Edition, save=True, overwrite=True + model=models.Edition, save=True, overwrite=False ) # set the cover image from the tar diff --git a/bookwyrm/tests/data/user_import.json b/bookwyrm/tests/data/user_import.json index 5c0e22ea0c..6b7d2eca2b 100644 --- a/bookwyrm/tests/data/user_import.json +++ b/bookwyrm/tests/data/user_import.json @@ -86,7 +86,7 @@ "id": "https://www.example.com/book/2", "type": "Edition", "openlibraryKey": "OL680025M", - "title": "Seeking Like A State", + "title": "Seeing Like A State", "sortTitle": "seeing like a state", "subtitle": "", "description": "

    Examines how (sometimes quasi-) authoritarian high-modernist planning fails to deliver the goods, be they increased resources for the state or a better life for the people.

    ", diff --git a/bookwyrm/tests/models/test_bookwyrm_import_job.py b/bookwyrm/tests/models/test_bookwyrm_import_job.py index e9e6f2600b..e802c1de81 100644 --- a/bookwyrm/tests/models/test_bookwyrm_import_job.py +++ b/bookwyrm/tests/models/test_bookwyrm_import_job.py @@ -1,14 +1,16 @@ """ testing models """ import json +import os import pathlib from unittest.mock import patch +from django.core.files import File from django.db.models import Q from django.utils.dateparse import parse_datetime from django.test import TestCase -from bookwyrm import models +from bookwyrm import activitypub, models from bookwyrm.utils.tar import BookwyrmTarFile from bookwyrm.models import bookwyrm_import_job @@ -72,8 +74,10 @@ def setUp(self): with open(self.json_file, "r", encoding="utf-8") as jsonfile: self.json_data = json.loads(jsonfile.read()) - self.archive_file = pathlib.Path(__file__).parent.joinpath( - "../data/bookwyrm_account_export.tar.gz" + self.archive_file_path = os.path.relpath( + pathlib.Path(__file__).parent.joinpath( + "../data/bookwyrm_account_export.tar.gz" + ) ) self.job = bookwyrm_import_job.BookwyrmImportJob.objects.create( @@ -89,7 +93,7 @@ def test_update_user_profile(self): patch("bookwyrm.suggested_users.rerank_user_task.delay"), ): with ( - open(self.archive_file, "rb") as fileobj, + open(self.archive_file_path, "rb") as fileobj, BookwyrmTarFile.open(mode="r:gz", fileobj=fileobj) as tarfile, ): models.bookwyrm_import_job.update_user_profile( @@ -229,6 +233,150 @@ def test_follow_relationship(self): ).exists() self.assertTrue(after_follow) + def test_import_book_task_existing_author(self): + """Test importing a book with an author + already known to the server does not overwrite""" + + self.assertEqual(models.Author.objects.count(), 0) + self.author = models.Author.objects.create( + id=1, + name="James C. Scott", + wikipedia_link="https://en.wikipedia.org/wiki/James_C._Scott", + wikidata="Q3025403", + aliases=["Test Alias"], + ) + + with open(self.archive_file_path, "rb") as fileobj: + self.job.archive_file = File(fileobj) + self.job.save() + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, book_data=self.json_data.get("books")[0] + ) + + self.assertEqual(models.Edition.objects.count(), 1) + + # run the task + bookwyrm_import_job.import_book_task(child_id=task.id) + + self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists()) + self.assertEqual(models.Edition.objects.count(), 2) + + # Check the existing author did not get overwritten + author = models.Author.objects.first() + self.assertEqual(author.name, "James C. Scott") + self.assertIn(author.aliases[0], "Test Alias") + + def test_import_book_task_existing_edition(self): + """Test importing a book with an edition + already known to the server does not overwrite""" + + with open(self.archive_file_path, "rb") as fileobj: + self.job.archive_file = File(fileobj) + self.job.save() + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, book_data=self.json_data.get("books")[1] + ) + + self.assertEqual(models.Edition.objects.count(), 1) + self.assertTrue(models.Edition.objects.filter(isbn_13="9780062975645").exists()) + + # run the task + bookwyrm_import_job.import_book_task(child_id=task.id) + + # Check the existing Edition did not get overwritten + self.assertEqual(models.Edition.objects.count(), 1) + self.assertEqual(models.Edition.objects.first().title, "Sand Talk") + + def test_import_book_task_existing_work(self): + """Test importing a book with a work unknown to the server""" + + with open(self.archive_file_path, "rb") as fileobj: + self.job.archive_file = File(fileobj) + self.job.save() + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, book_data=self.json_data.get("books")[1] + ) + + self.assertEqual(models.Work.objects.count(), 1) + + # run the task + bookwyrm_import_job.import_book_task(child_id=task.id) + + # Check the existing Work did not get overwritten + self.assertEqual(models.Work.objects.count(), 1) + self.assertNotEqual( + self.json_data.get("books")[1]["work"]["title"], models.Work.objects.first() + ) + + def test_import_book_task_new_author(self): + """Test importing a book with author not known + to the server imports the new author""" + + with open(self.archive_file_path, "rb") as fileobj: + self.job.archive_file = File(fileobj) + self.job.save() + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, book_data=self.json_data.get("books")[0] + ) + + self.assertEqual(models.Edition.objects.count(), 1) + + # run the task + bookwyrm_import_job.import_book_task(child_id=task.id) + + self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists()) + self.assertEqual(models.Edition.objects.count(), 2) + + # Check the author was created + author = models.Author.objects.get() + self.assertEqual(author.name, "James C. Scott") + self.assertIn(author.aliases[0], "James Campbell Scott") + + def test_import_book_task_new_edition(self): + """Test importing a book with an edition + unknown to the server""" + + with open(self.archive_file_path, "rb") as fileobj: + self.job.archive_file = File(fileobj) + self.job.save() + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, book_data=self.json_data.get("books")[0] + ) + + self.assertEqual(models.Edition.objects.count(), 1) + self.assertFalse( + models.Edition.objects.filter(isbn_13="9780300070163").exists() + ) + + # run the task + bookwyrm_import_job.import_book_task(child_id=task.id) + + # Check the Edition was added + self.assertEqual(models.Edition.objects.count(), 2) + self.assertEqual(models.Edition.objects.first().title, "Sand Talk") + self.assertEqual(models.Edition.objects.last().title, "Seeing Like A State") + self.assertTrue(models.Edition.objects.filter(isbn_13="9780300070163").exists()) + + def test_import_book_task_new_work(self): + """Test importing a book with a work unknown to the server""" + + with open(self.archive_file_path, "rb") as fileobj: + self.job.archive_file = File(fileobj) + self.job.save() + task = bookwyrm_import_job.UserImportBook.objects.create( + parent_job=self.job, book_data=self.json_data.get("books")[0] + ) + + self.assertEqual(models.Work.objects.count(), 1) + + # run the task + bookwyrm_import_job.import_book_task(child_id=task.id) + + # Check the Work was added + self.assertEqual(models.Work.objects.count(), 2) + self.assertEqual(models.Work.objects.first().title, "Sand Talk") + self.assertEqual(models.Work.objects.last().title, "Seeing Like a State") + def test_block_relationship(self): """test adding blocks for users""" @@ -328,7 +476,7 @@ def test_upsert_readthroughs(self): self.assertEqual(models.ReadThrough.objects.first().user, self.local_user) def test_get_or_create_review(self): - """Test get_or_create_review_status with a review""" + """Test upsert_status_task with a review""" task = bookwyrm_import_job.UserImportPost.objects.create( parent_job=self.job, @@ -366,7 +514,7 @@ def test_get_or_create_review(self): ) def test_get_or_create_comment(self): - """Test get_or_create_review_status with a comment""" + """Test upsert_status_task with a comment""" task = bookwyrm_import_job.UserImportPost.objects.create( parent_job=self.job, @@ -396,7 +544,7 @@ def test_get_or_create_comment(self): ) def test_get_or_create_quote(self): - """Test get_or_create_review_status with a quote""" + """Test upsert_status_task with a quote""" task = bookwyrm_import_job.UserImportPost.objects.create( parent_job=self.job, @@ -432,7 +580,7 @@ def test_get_or_create_quote(self): ) def test_get_or_create_quote_unauthorized(self): - """Test get_or_create_review_status with a quote but not authorized""" + """Test upsert_status_task with a quote but not authorized""" task = bookwyrm_import_job.UserImportPost.objects.create( parent_job=self.job, @@ -574,3 +722,51 @@ def test_upsert_shelves_not_existing(self): self.assertEqual( models.Shelf.objects.filter(user=self.local_user.id).count(), 4 ) + + def test_update_followers_address(self): + """test updating followers address to local""" + + user = self.local_user + followers = ["https://old.address/user/oldusername/followers"] + new_followers = bookwyrm_import_job.update_followers_address(user, followers) + + self.assertEqual(new_followers, [f"{self.local_user.remote_id}/followers"]) + + def test_is_alias(self): + """test checking for valid alias""" + + self.rat_user.also_known_as.add(self.local_user) + + with patch( + "bookwyrm.activitypub.resolve_remote_id", return_value=self.rat_user + ): + + alias = bookwyrm_import_job.is_alias( + self.local_user, self.rat_user.remote_id + ) + + self.assertTrue(alias) + + def test_status_already_exists(self): + """test status checking""" + + string = '{"id":"https://www.example.com/user/rat/comment/4","type":"Comment","published":"2023-08-14T04:48:18.746+00:00","attributedTo":"https://www.example.com/user/rat","content":"

    this is a comment about an amazing book

    ","to":["https://www.w3.org/ns/activitystreams#Public"],"cc":["https://www.example.com/user/rat/followers"],"replies":{"id":"https://www.example.com/user/rat/comment/4/replies","type":"OrderedCollection","totalItems":0,"first":"https://www.example.com/user/rat/comment/4/replies?page=1","last":"https://www.example.com/user/rat/comment/4/replies?page=1","@context":"https://www.w3.org/ns/activitystreams"},"tag":[],"attachment":[],"sensitive":false,"inReplyToBook":"https://www.example.com/book/4","readingStatus":null,"@context":"https://www.w3.org/ns/activitystreams"}' + + status = json.loads(string) + parsed = activitypub.parse(status) + exists = bookwyrm_import_job.status_already_exists(self.local_user, parsed) + + self.assertFalse(exists) + + comment = models.Comment.objects.create( + user=self.local_user, book=self.book, content="

    hi

    " + ) + status_two = comment.to_activity() + parsed_two = activitypub.parse(status_two) + + post = models.Status.objects.filter(user=self.local_user).first() + exists_two = bookwyrm_import_job.status_already_exists( + self.local_user, parsed_two + ) + + self.assertTrue(exists_two) From 5be99af5d0b0d958bfe23daf91415bde4302959f Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Sun, 8 Sep 2024 15:53:19 +1000 Subject: [PATCH 08/16] add average import and export times for users --- bookwyrm/templates/import/import_user.html | 15 ++++++++ .../templates/preferences/export-user.html | 31 ++++++++++++---- bookwyrm/views/imports/import_data.py | 31 ++++++++++++++++ bookwyrm/views/preferences/export.py | 35 ++++++++++++++++++- 4 files changed, 105 insertions(+), 7 deletions(-) diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html index 0f4bf4f5df..8b590871f7 100644 --- a/bookwyrm/templates/import/import_user.html +++ b/bookwyrm/templates/import/import_user.html @@ -33,6 +33,21 @@

    {% blocktrans with next_time=next_available.0 %}You will next be able to import a user file at {{ next_time }}{% endblocktrans %}

    {% else %} + {% if recent_avg_hours or recent_avg_minutes %} +
    +

    + {% if recent_avg_hours %} + {% blocktrans trimmed with hours=recent_avg_hours|floatformat:0|intcomma %} + On average, recent imports have taken {{ hours }} hours. + {% endblocktrans %} + {% else %} + {% blocktrans trimmed with minutes=recent_avg_minutes|floatformat:0|intcomma %} + On average, recent imports have taken {{ minutes }} minutes. + {% endblocktrans %} + {% endif %} +

    +
    + {% endif %}
    {% csrf_token %} diff --git a/bookwyrm/templates/preferences/export-user.html b/bookwyrm/templates/preferences/export-user.html index bd675afaab..de0c32b55f 100644 --- a/bookwyrm/templates/preferences/export-user.html +++ b/bookwyrm/templates/preferences/export-user.html @@ -1,5 +1,6 @@ {% extends 'preferences/layout.html' %} {% load i18n %} +{% load humanize %} {% load utilities %} {% block title %}{% trans "Export BookWyrm Account" %}{% endblock %} @@ -48,12 +49,12 @@

    {% trans "Your file will not include:" %}

    {% trans "New user exports are currently disabled." %} {% if perms.bookwyrm.edit_instance_settings %} -
    - {% url 'settings-imports' as url %} - {% blocktrans trimmed %} - User exports settings can be changed from the Imports page in the Admin dashboard. - {% endblocktrans %} - {% endif%} +
    + {% url 'settings-imports' as url %} + {% blocktrans trimmed %} + User exports settings can be changed from the Imports page in the Admin dashboard. + {% endblocktrans %} + {% endif%}

    {% elif next_available %}

    @@ -61,7 +62,25 @@

    {% trans "Your file will not include:" %}

    You will be able to create a new export file at {{ next_available }} {% endblocktrans %}

    + {% else %} + + {% if recent_avg_hours or recent_avg_minutes %} +
    +

    + {% if recent_avg_hours %} + {% blocktrans trimmed with hours=recent_avg_hours|floatformat:0|intcomma %} + On average, recent exports have taken {{ hours }} hours. + {% endblocktrans %} + {% else %} + {% blocktrans trimmed with minutes=recent_avg_minutes|floatformat:0|intcomma %} + On average, recent exports have taken {{ minutes }} minutes. + {% endblocktrans %} + {% endif %} +

    +
    + {% endif %} + {% csrf_token %}
    {% endblock %} \ No newline at end of file From cd7f4656ef8f92d1f81722dd780e86c93080753e Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 17 Oct 2024 15:57:35 -0700 Subject: [PATCH 13/16] Fixes error in handle display of import statuses --- bookwyrm/templates/preferences/export-user.html | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bookwyrm/templates/preferences/export-user.html b/bookwyrm/templates/preferences/export-user.html index de0c32b55f..0a7e8f044a 100644 --- a/bookwyrm/templates/preferences/export-user.html +++ b/bookwyrm/templates/preferences/export-user.html @@ -126,14 +126,13 @@

    {% trans "Recent Exports" %}

    {% elif export.job.status == "pending" %} class="tag is-warning" {% elif export.job.complete %} - class="tag" - {% else %} class="tag is-success" + {% else %} + class="tag" {% endif %} > {% if export.job.status %} - {{ export.job.status }} - {{ export.job.status_display }} + {{ export.job.get_status_display }} {% elif export.job.complete %} {% trans "Complete" %} {% else %} From b1092587694ea29eb8a65b26df43bbca02d8b08a Mon Sep 17 00:00:00 2001 From: Mouse Reeve Date: Thu, 17 Oct 2024 16:00:41 -0700 Subject: [PATCH 14/16] Update import_user.html --- bookwyrm/templates/import/import_user.html | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/bookwyrm/templates/import/import_user.html b/bookwyrm/templates/import/import_user.html index 8b590871f7..3468c74c02 100644 --- a/bookwyrm/templates/import/import_user.html +++ b/bookwyrm/templates/import/import_user.html @@ -212,14 +212,13 @@

    {% trans "Recent Imports" %}

    {% elif job.status == "pending" %} class="tag is-warning" {% elif job.complete %} - class="tag" - {% else %} class="tag is-success" + {% else %} + class="tag" {% endif %} > {% if job.status %} - {{ job.status }} - {{ job.status_display }} + {{ job.get_status_display }} {% elif job.complete %} {% trans "Complete" %} {% else %} From d3b689bba428011840952cadd32248371b7fe52f Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 21 Oct 2024 10:03:43 +1100 Subject: [PATCH 15/16] refactor Job move fail_reason into Job definition Bump aiohttp from 3.9.4 to 3.10.2 Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.9.4 to 3.10.2. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.9.4...v3.10.2) --- updated-dependencies: - dependency-name: aiohttp dependency-type: direct:production ... Signed-off-by: dependabot[bot] Convert min_confidence param to string to appease mypy docker-compose 'version' has been deprecated remove unused TextField import --- bookwyrm/connectors/abstract_connector.py | 2 +- ...ove_userimportbook_fail_reason_and_more.py | 35 +++++++++++++++++++ bookwyrm/models/bookwyrm_import_job.py | 4 --- bookwyrm/models/job.py | 1 + docker-compose.yml | 2 -- requirements.txt | 2 +- 6 files changed, 38 insertions(+), 8 deletions(-) create mode 100644 bookwyrm/migrations/0211_remove_userimportbook_fail_reason_and_more.py diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index aa8edbeae9..4be10f4660 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -86,7 +86,7 @@ async def get_results( ), "User-Agent": USER_AGENT, } - params = {"min_confidence": min_confidence} + params = {"min_confidence": str(min_confidence)} try: async with session.get(url, headers=headers, params=params) as response: if not response.ok: diff --git a/bookwyrm/migrations/0211_remove_userimportbook_fail_reason_and_more.py b/bookwyrm/migrations/0211_remove_userimportbook_fail_reason_and_more.py new file mode 100644 index 0000000000..6c3a3dcb4a --- /dev/null +++ b/bookwyrm/migrations/0211_remove_userimportbook_fail_reason_and_more.py @@ -0,0 +1,35 @@ +# Generated by Django 4.2.15 on 2024-10-20 22:29 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("bookwyrm", "0210_userrelationshipimport_and_more"), + ] + + operations = [ + migrations.RemoveField( + model_name="userimportbook", + name="fail_reason", + ), + migrations.RemoveField( + model_name="userimportpost", + name="fail_reason", + ), + migrations.RemoveField( + model_name="userrelationshipimport", + name="fail_reason", + ), + migrations.AddField( + model_name="childjob", + name="fail_reason", + field=models.TextField(null=True), + ), + migrations.AddField( + model_name="parentjob", + name="fail_reason", + field=models.TextField(null=True), + ), + ] diff --git a/bookwyrm/models/bookwyrm_import_job.py b/bookwyrm/models/bookwyrm_import_job.py index 384383e5a6..798b2814da 100644 --- a/bookwyrm/models/bookwyrm_import_job.py +++ b/bookwyrm/models/bookwyrm_import_job.py @@ -9,7 +9,6 @@ ForeignKey, FileField, JSONField, - TextField, TextChoices, PROTECT, SET_NULL, @@ -95,7 +94,6 @@ class UserImportBook(ChildJob): book = ForeignKey(models.Book, on_delete=SET_NULL, null=True, blank=True) book_data = JSONField(null=False) - fail_reason = TextField(null=True) def start_job(self): """Start the job""" @@ -119,7 +117,6 @@ class StatusType(TextChoices): status_type = models.fields.CharField( max_length=10, choices=StatusType.choices, default=StatusType.COMMENT, null=True ) - fail_reason = TextField(null=True) def start_job(self): """Start the job""" @@ -139,7 +136,6 @@ class RelationshipType(TextChoices): max_length=10, choices=RelationshipType.choices, null=True ) remote_id = models.fields.RemoteIdField(null=True, unique=False) - fail_reason = TextField(null=True) def start_job(self): """Start the job""" diff --git a/bookwyrm/models/job.py b/bookwyrm/models/job.py index 4d25c14404..ff4895dc60 100644 --- a/bookwyrm/models/job.py +++ b/bookwyrm/models/job.py @@ -29,6 +29,7 @@ class Status(models.TextChoices): status = models.CharField( max_length=50, choices=Status.choices, default=Status.PENDING, null=True ) + fail_reason = models.TextField(null=True) class Meta: """Make it abstract""" diff --git a/docker-compose.yml b/docker-compose.yml index 634c021b6a..6ac30bbc16 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,5 +1,3 @@ -version: '3' - services: nginx: image: nginx:1.25.2 diff --git a/requirements.txt b/requirements.txt index 49bc810bcf..64ef099c7d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -aiohttp==3.9.4 +aiohttp==3.10.2 bleach==6.1.0 boto3==1.34.74 bw-file-resubmit==0.6.0rc2 From a0dd61335dc48de960381c7c5db11adec93449bd Mon Sep 17 00:00:00 2001 From: Hugh Rundle Date: Mon, 21 Oct 2024 13:05:39 +1100 Subject: [PATCH 16/16] template fixes --- bookwyrm/templates/import/user_troubleshoot.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bookwyrm/templates/import/user_troubleshoot.html b/bookwyrm/templates/import/user_troubleshoot.html index a92eae718f..350914881f 100644 --- a/bookwyrm/templates/import/user_troubleshoot.html +++ b/bookwyrm/templates/import/user_troubleshoot.html @@ -70,8 +70,8 @@ {% for item in items %} - {{ item.userimportpost.book.title }} - {{ item.userimportpost.json.name }} + {{ item.userimportpost.book.title }} + {{ item.userimportpost.json.type }} {% id_to_username item.userrelationshipimport.remote_id True %} {% if item.fail_reason == "unauthorized" %}