diff --git a/.dockerignore b/.dockerignore index a5130c8bd1..5edf3de0d9 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,3 +5,4 @@ __pycache__ .git .github .pytest* +.env diff --git a/.env.example b/.env.example index 2be08224b8..fb0f7308d1 100644 --- a/.env.example +++ b/.env.example @@ -8,7 +8,7 @@ USE_HTTPS=true DOMAIN=your.domain.here EMAIL=your@email.here -# Instance defualt language (see options at bookwyrm/settings.py "LANGUAGES" +# Instance default language (see options at bookwyrm/settings.py "LANGUAGES" LANGUAGE_CODE="en-us" # Used for deciding which editions to prefer DEFAULT_LANGUAGE="English" @@ -82,6 +82,12 @@ AWS_SECRET_ACCESS_KEY= # AWS_S3_REGION_NAME=None # "fr-par" # AWS_S3_ENDPOINT_URL=None # "https://s3.fr-par.scw.cloud" +# Commented are example values if you use Azure Blob Storage +# USE_AZURE=true +# AZURE_ACCOUNT_NAME= # "example-account-name" +# AZURE_ACCOUNT_KEY= # "base64-encoded-access-key" +# AZURE_CONTAINER= # "example-blob-container-name" +# AZURE_CUSTOM_DOMAIN= # "example-account-name.blob.core.windows.net" # Preview image generation can be computing and storage intensive ENABLE_PREVIEW_IMAGES=False diff --git a/bookwyrm/activitypub/base_activity.py b/bookwyrm/activitypub/base_activity.py index 840dab6a43..9b7897ebaa 100644 --- a/bookwyrm/activitypub/base_activity.py +++ b/bookwyrm/activitypub/base_activity.py @@ -127,7 +127,7 @@ def to_model( if ( allow_create and hasattr(model, "ignore_activity") - and model.ignore_activity(self) + and model.ignore_activity(self, allow_external_connections) ): return None @@ -241,7 +241,7 @@ def serialize(self, **kwargs): return data -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) @transaction.atomic def set_related_field( model_name, origin_model_name, related_field_name, related_remote_id, data @@ -384,7 +384,8 @@ def get_activitypub_data(url): resp = requests.get( url, headers={ - "Accept": "application/json; charset=utf-8", + # pylint: disable=line-too-long + "Accept": 'application/ld+json; profile="https://www.w3.org/ns/activitystreams"', "Date": now, "Signature": make_signature("get", sender, url, now), }, diff --git a/bookwyrm/activitystreams.py b/bookwyrm/activitystreams.py index d4dac14120..5d581d564e 100644 --- a/bookwyrm/activitystreams.py +++ b/bookwyrm/activitystreams.py @@ -4,10 +4,15 @@ from django.db import transaction from django.db.models import signals, Q from django.utils import timezone +from opentelemetry import trace from bookwyrm import models from bookwyrm.redis_store import RedisStore, r from bookwyrm.tasks import app, LOW, MEDIUM, HIGH +from bookwyrm.telemetry import open_telemetry + + +tracer = open_telemetry.tracer() class ActivityStream(RedisStore): @@ -33,11 +38,14 @@ def get_rank(self, obj): # pylint: disable=no-self-use def add_status(self, status, increment_unread=False): """add a status to users' feeds""" + audience = self.get_audience(status) # the pipeline contains all the add-to-stream activities - pipeline = self.add_object_to_related_stores(status, execute=False) + pipeline = self.add_object_to_stores( + status, self.get_stores_for_users(audience), execute=False + ) if increment_unread: - for user_id in self.get_audience(status): + for user_id in audience: # add to the unread status count pipeline.incr(self.unread_id(user_id)) # add to the unread status count for status type @@ -97,11 +105,18 @@ def populate_streams(self, user): """go from zero to a timeline""" self.populate_store(self.stream_id(user.id)) + @tracer.start_as_current_span("ActivityStream._get_audience") def _get_audience(self, status): # pylint: disable=no-self-use - """given a status, what users should see it""" - # direct messages don't appeard in feeds, direct comments/reviews/etc do + """given a status, what users should see it, excluding the author""" + trace.get_current_span().set_attribute("status_type", status.status_type) + trace.get_current_span().set_attribute("status_privacy", status.privacy) + trace.get_current_span().set_attribute( + "status_reply_parent_privacy", + status.reply_parent.privacy if status.reply_parent else None, + ) + # direct messages don't appear in feeds, direct comments/reviews/etc do if status.privacy == "direct" and status.status_type == "Note": - return [] + return models.User.objects.none() # everybody who could plausibly see this status audience = models.User.objects.filter( @@ -114,15 +129,13 @@ def _get_audience(self, status): # pylint: disable=no-self-use # only visible to the poster and mentioned users if status.privacy == "direct": audience = audience.filter( - Q(id=status.user.id) # if the user is the post's author - | Q(id__in=status.mention_users.all()) # if the user is mentioned + Q(id__in=status.mention_users.all()) # if the user is mentioned ) # don't show replies to statuses the user can't see elif status.reply_parent and status.reply_parent.privacy == "followers": audience = audience.filter( - Q(id=status.user.id) # if the user is the post's author - | Q(id=status.reply_parent.user.id) # if the user is the OG author + Q(id=status.reply_parent.user.id) # if the user is the OG author | ( Q(following=status.user) & Q(following=status.reply_parent.user) ) # if the user is following both authors @@ -131,17 +144,23 @@ def _get_audience(self, status): # pylint: disable=no-self-use # only visible to the poster's followers and tagged users elif status.privacy == "followers": audience = audience.filter( - Q(id=status.user.id) # if the user is the post's author - | Q(following=status.user) # if the user is following the author + Q(following=status.user) # if the user is following the author ) return audience.distinct() - def get_audience(self, status): # pylint: disable=no-self-use + @tracer.start_as_current_span("ActivityStream.get_audience") + def get_audience(self, status): """given a status, what users should see it""" - return [user.id for user in self._get_audience(status)] + trace.get_current_span().set_attribute("stream_id", self.key) + audience = self._get_audience(status).values_list("id", flat=True) + status_author = models.User.objects.filter( + is_active=True, local=True, id=status.user.id + ).values_list("id", flat=True) + return list(set(list(audience) + list(status_author))) - def get_stores_for_object(self, obj): - return [self.stream_id(user_id) for user_id in self.get_audience(obj)] + def get_stores_for_users(self, user_ids): + """convert a list of user ids into redis store ids""" + return [self.stream_id(user_id) for user_id in user_ids] def get_statuses_for_user(self, user): # pylint: disable=no-self-use """given a user, what statuses should they see on this stream""" @@ -160,15 +179,19 @@ class HomeStream(ActivityStream): key = "home" + @tracer.start_as_current_span("HomeStream.get_audience") def get_audience(self, status): + trace.get_current_span().set_attribute("stream_id", self.key) audience = super()._get_audience(status) if not audience: return [] - # if the user is the post's author - ids_self = [user.id for user in audience.filter(Q(id=status.user.id))] # if the user is following the author - ids_following = [user.id for user in audience.filter(Q(following=status.user))] - return ids_self + ids_following + audience = audience.filter(following=status.user).values_list("id", flat=True) + # if the user is the post's author + status_author = models.User.objects.filter( + is_active=True, local=True, id=status.user.id + ).values_list("id", flat=True) + return list(set(list(audience) + list(status_author))) def get_statuses_for_user(self, user): return models.Status.privacy_filter( @@ -188,11 +211,11 @@ class LocalStream(ActivityStream): key = "local" - def _get_audience(self, status): + def get_audience(self, status): # this stream wants no part in non-public statuses if status.privacy != "public" or not status.user.local: return [] - return super()._get_audience(status) + return super().get_audience(status) def get_statuses_for_user(self, user): # all public statuses by a local user @@ -209,13 +232,6 @@ class BooksStream(ActivityStream): def _get_audience(self, status): """anyone with the mentioned book on their shelves""" - # only show public statuses on the books feed, - # and only statuses that mention books - if status.privacy != "public" or not ( - status.mention_books.exists() or hasattr(status, "book") - ): - return [] - work = ( status.book.parent_work if hasattr(status, "book") @@ -224,9 +240,19 @@ def _get_audience(self, status): audience = super()._get_audience(status) if not audience: - return [] + return models.User.objects.none() return audience.filter(shelfbook__book__parent_work=work).distinct() + def get_audience(self, status): + # only show public statuses on the books feed, + # and only statuses that mention books + if status.privacy != "public" or not ( + status.mention_books.exists() or hasattr(status, "book") + ): + return [] + + return super().get_audience(status) + def get_statuses_for_user(self, user): """any public status that mentions the user's books""" books = user.shelfbook_set.values_list( @@ -471,7 +497,7 @@ def remove_statuses_on_unshelve(sender, instance, *args, **kwargs): # ---- TASKS -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def add_book_statuses_task(user_id, book_id): """add statuses related to a book on shelve""" user = models.User.objects.get(id=user_id) @@ -479,7 +505,7 @@ def add_book_statuses_task(user_id, book_id): BooksStream().add_book_statuses(user, book) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def remove_book_statuses_task(user_id, book_id): """remove statuses about a book from a user's books feed""" user = models.User.objects.get(id=user_id) @@ -487,7 +513,7 @@ def remove_book_statuses_task(user_id, book_id): BooksStream().remove_book_statuses(user, book) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def populate_stream_task(stream, user_id): """background task for populating an empty activitystream""" user = models.User.objects.get(id=user_id) @@ -495,7 +521,7 @@ def populate_stream_task(stream, user_id): stream.populate_streams(user) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def remove_status_task(status_ids): """remove a status from any stream it might be in""" # this can take an id or a list of ids @@ -505,10 +531,12 @@ def remove_status_task(status_ids): for stream in streams.values(): for status in statuses: - stream.remove_object_from_related_stores(status) + stream.remove_object_from_stores( + status, stream.get_stores_for_users(stream.get_audience(status)) + ) -@app.task(queue=HIGH, ignore_result=True) +@app.task(queue=HIGH) def add_status_task(status_id, increment_unread=False): """add a status to any stream it should be in""" status = models.Status.objects.select_subclasses().get(id=status_id) @@ -520,7 +548,7 @@ def add_status_task(status_id, increment_unread=False): stream.add_status(status, increment_unread=increment_unread) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def remove_user_statuses_task(viewer_id, user_id, stream_list=None): """remove all statuses by a user from a viewer's stream""" stream_list = [streams[s] for s in stream_list] if stream_list else streams.values() @@ -530,7 +558,7 @@ def remove_user_statuses_task(viewer_id, user_id, stream_list=None): stream.remove_user_statuses(viewer, user) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def add_user_statuses_task(viewer_id, user_id, stream_list=None): """add all statuses by a user to a viewer's stream""" stream_list = [streams[s] for s in stream_list] if stream_list else streams.values() @@ -540,7 +568,7 @@ def add_user_statuses_task(viewer_id, user_id, stream_list=None): stream.add_user_statuses(viewer, user) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def handle_boost_task(boost_id): """remove the original post and other, earlier boosts""" instance = models.Status.objects.get(id=boost_id) @@ -554,10 +582,10 @@ def handle_boost_task(boost_id): for stream in streams.values(): # people who should see the boost (not people who see the original status) - audience = stream.get_stores_for_object(instance) - stream.remove_object_from_related_stores(boosted, stores=audience) + audience = stream.get_stores_for_users(stream.get_audience(instance)) + stream.remove_object_from_stores(boosted, audience) for status in old_versions: - stream.remove_object_from_related_stores(status, stores=audience) + stream.remove_object_from_stores(status, audience) def get_status_type(status): diff --git a/bookwyrm/apps.py b/bookwyrm/apps.py index 786f86e1c6..b0c3e3fa4c 100644 --- a/bookwyrm/apps.py +++ b/bookwyrm/apps.py @@ -35,11 +35,12 @@ class BookwyrmConfig(AppConfig): # pylint: disable=no-self-use def ready(self): """set up OTLP and preview image files, if desired""" - if settings.OTEL_EXPORTER_OTLP_ENDPOINT: + if settings.OTEL_EXPORTER_OTLP_ENDPOINT or settings.OTEL_EXPORTER_CONSOLE: # pylint: disable=import-outside-toplevel from bookwyrm.telemetry import open_telemetry open_telemetry.instrumentDjango() + open_telemetry.instrumentPostgres() if settings.ENABLE_PREVIEW_IMAGES and settings.FONTS: # Download any fonts that we don't have yet diff --git a/bookwyrm/connectors/abstract_connector.py b/bookwyrm/connectors/abstract_connector.py index 0e04ffaf25..950bb11f98 100644 --- a/bookwyrm/connectors/abstract_connector.py +++ b/bookwyrm/connectors/abstract_connector.py @@ -4,13 +4,16 @@ import imghdr import logging import re +import asyncio +import requests +from requests.exceptions import RequestException +import aiohttp from django.core.files.base import ContentFile from django.db import transaction -import requests -from requests.exceptions import RequestException from bookwyrm import activitypub, models, settings +from bookwyrm.settings import USER_AGENT from .connector_manager import load_more_data, ConnectorException, raise_not_valid_url from .format_mappings import format_mappings @@ -52,11 +55,44 @@ def get_search_url(self, query): return f"{self.search_url}{quote_plus(query)}" def process_search_response(self, query, data, min_confidence): - """Format the search results based on the formt of the query""" + """Format the search results based on the format of the query""" if maybe_isbn(query): return list(self.parse_isbn_search_data(data))[:10] return list(self.parse_search_data(data, min_confidence))[:10] + async def get_results(self, session, url, min_confidence, query): + """try this specific connector""" + # pylint: disable=line-too-long + headers = { + "Accept": ( + 'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8' + ), + "User-Agent": USER_AGENT, + } + params = {"min_confidence": min_confidence} + try: + async with session.get(url, headers=headers, params=params) as response: + if not response.ok: + logger.info("Unable to connect to %s: %s", url, response.reason) + return + + try: + raw_data = await response.json() + except aiohttp.client_exceptions.ContentTypeError as err: + logger.exception(err) + return + + return { + "connector": self, + "results": self.process_search_response( + query, raw_data, min_confidence + ), + } + except asyncio.TimeoutError: + logger.info("Connection timed out for url: %s", url) + except aiohttp.ClientError as err: + logger.info(err) + @abstractmethod def get_or_create_book(self, remote_id): """pull up a book record by whatever means possible""" @@ -321,7 +357,7 @@ def infer_physical_format(format_text): def unique_physical_format(format_text): - """only store the format if it isn't diretly in the format mappings""" + """only store the format if it isn't directly in the format mappings""" format_text = format_text.lower() if format_text in format_mappings: # try a direct match, so saving this would be redundant diff --git a/bookwyrm/connectors/connector_manager.py b/bookwyrm/connectors/connector_manager.py index 4330d4ac26..7e823c0afa 100644 --- a/bookwyrm/connectors/connector_manager.py +++ b/bookwyrm/connectors/connector_manager.py @@ -12,7 +12,7 @@ from requests import HTTPError from bookwyrm import book_search, models -from bookwyrm.settings import SEARCH_TIMEOUT, USER_AGENT +from bookwyrm.settings import SEARCH_TIMEOUT from bookwyrm.tasks import app, LOW logger = logging.getLogger(__name__) @@ -22,40 +22,6 @@ class ConnectorException(HTTPError): """when the connector can't do what was asked""" -async def get_results(session, url, min_confidence, query, connector): - """try this specific connector""" - # pylint: disable=line-too-long - headers = { - "Accept": ( - 'application/json, application/activity+json, application/ld+json; profile="https://www.w3.org/ns/activitystreams"; charset=utf-8' - ), - "User-Agent": USER_AGENT, - } - params = {"min_confidence": min_confidence} - try: - async with session.get(url, headers=headers, params=params) as response: - if not response.ok: - logger.info("Unable to connect to %s: %s", url, response.reason) - return - - try: - raw_data = await response.json() - except aiohttp.client_exceptions.ContentTypeError as err: - logger.exception(err) - return - - return { - "connector": connector, - "results": connector.process_search_response( - query, raw_data, min_confidence - ), - } - except asyncio.TimeoutError: - logger.info("Connection timed out for url: %s", url) - except aiohttp.ClientError as err: - logger.info(err) - - async def async_connector_search(query, items, min_confidence): """Try a number of requests simultaneously""" timeout = aiohttp.ClientTimeout(total=SEARCH_TIMEOUT) @@ -64,7 +30,7 @@ async def async_connector_search(query, items, min_confidence): for url, connector in items: tasks.append( asyncio.ensure_future( - get_results(session, url, min_confidence, query, connector) + connector.get_results(session, url, min_confidence, query) ) ) @@ -73,7 +39,7 @@ async def async_connector_search(query, items, min_confidence): def search(query, min_confidence=0.1, return_first=False): - """find books based on arbitary keywords""" + """find books based on arbitrary keywords""" if not query: return [] results = [] @@ -143,7 +109,7 @@ def get_or_create_connector(remote_id): return load_connector(connector_info) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def load_more_data(connector_id, book_id): """background the work of getting all 10,000 editions of LoTR""" connector_info = models.Connector.objects.get(id=connector_id) @@ -152,7 +118,7 @@ def load_more_data(connector_id, book_id): connector.expand_book_data(book) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def create_edition_task(connector_id, work_id, data): """separate task for each of the 10,000 editions of LoTR""" connector_info = models.Connector.objects.get(id=connector_id) diff --git a/bookwyrm/connectors/inventaire.py b/bookwyrm/connectors/inventaire.py index a330b2c4a5..f3e24c0ec5 100644 --- a/bookwyrm/connectors/inventaire.py +++ b/bookwyrm/connectors/inventaire.py @@ -97,7 +97,7 @@ def parse_search_data(self, data, min_confidence): ) def parse_isbn_search_data(self, data): - """got some daaaata""" + """got some data""" results = data.get("entities") if not results: return diff --git a/bookwyrm/emailing.py b/bookwyrm/emailing.py index 1640c0b733..2271077b12 100644 --- a/bookwyrm/emailing.py +++ b/bookwyrm/emailing.py @@ -75,7 +75,7 @@ def format_email(email_name, data): return (subject, html_content, text_content) -@app.task(queue=HIGH, ignore_result=True) +@app.task(queue=HIGH) def send_email(recipient, subject, html_content, text_content): """use a task to send the email""" email = EmailMultiAlternatives( diff --git a/bookwyrm/forms/admin.py b/bookwyrm/forms/admin.py index 1ad1581191..72f50ccb87 100644 --- a/bookwyrm/forms/admin.py +++ b/bookwyrm/forms/admin.py @@ -15,7 +15,7 @@ # pylint: disable=missing-class-docstring class ExpiryWidget(widgets.Select): def value_from_datadict(self, data, files, name): - """human-readable exiration time buckets""" + """human-readable expiration time buckets""" selected_string = super().value_from_datadict(data, files, name) if selected_string == "day": diff --git a/bookwyrm/importers/librarything_import.py b/bookwyrm/importers/librarything_import.py index c6833547d4..ea31b46eb6 100644 --- a/bookwyrm/importers/librarything_import.py +++ b/bookwyrm/importers/librarything_import.py @@ -19,7 +19,7 @@ def normalize_row(self, entry, mappings): # pylint: disable=no-self-use normalized = {k: remove_brackets(entry.get(v)) for k, v in mappings.items()} isbn_13 = normalized.get("isbn_13") isbn_13 = isbn_13.split(", ") if isbn_13 else [] - normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 0 else None + normalized["isbn_13"] = isbn_13[1] if len(isbn_13) > 1 else None return normalized def get_shelf(self, normalized_row): diff --git a/bookwyrm/lists_stream.py b/bookwyrm/lists_stream.py index 7426488cec..2b08010b12 100644 --- a/bookwyrm/lists_stream.py +++ b/bookwyrm/lists_stream.py @@ -24,8 +24,7 @@ def get_rank(self, obj): # pylint: disable=no-self-use def add_list(self, book_list): """add a list to users' feeds""" - # the pipeline contains all the add-to-stream activities - self.add_object_to_related_stores(book_list) + self.add_object_to_stores(book_list, self.get_stores_for_object(book_list)) def add_user_lists(self, viewer, user): """add a user's lists to another user's feed""" @@ -86,18 +85,19 @@ def get_audience(self, book_list): # pylint: disable=no-self-use if group: audience = audience.filter( Q(id=book_list.user.id) # if the user is the list's owner - | Q(following=book_list.user) # if the user is following the pwmer + | Q(following=book_list.user) # if the user is following the owner # if a user is in the group | Q(memberships__group__id=book_list.group.id) ) else: audience = audience.filter( Q(id=book_list.user.id) # if the user is the list's owner - | Q(following=book_list.user) # if the user is following the pwmer + | Q(following=book_list.user) # if the user is following the owner ) return audience.distinct() def get_stores_for_object(self, obj): + """the stores that an object belongs in""" return [self.stream_id(u) for u in self.get_audience(obj)] def get_lists_for_user(self, user): # pylint: disable=no-self-use @@ -217,14 +217,14 @@ def add_list_on_account_create_command(user_id): # ---- TASKS -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def populate_lists_task(user_id): """background task for populating an empty list stream""" user = models.User.objects.get(id=user_id) ListsStream().populate_lists(user) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def remove_list_task(list_id, re_add=False): """remove a list from any stream it might be in""" stores = models.User.objects.filter(local=True, is_active=True).values_list( @@ -233,20 +233,20 @@ def remove_list_task(list_id, re_add=False): # delete for every store stores = [ListsStream().stream_id(idx) for idx in stores] - ListsStream().remove_object_from_related_stores(list_id, stores=stores) + ListsStream().remove_object_from_stores(list_id, stores) if re_add: add_list_task.delay(list_id) -@app.task(queue=HIGH, ignore_result=True) +@app.task(queue=HIGH) def add_list_task(list_id): """add a list to any stream it should be in""" book_list = models.List.objects.get(id=list_id) ListsStream().add_list(book_list) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def remove_user_lists_task(viewer_id, user_id, exclude_privacy=None): """remove all lists by a user from a viewer's stream""" viewer = models.User.objects.get(id=viewer_id) @@ -254,7 +254,7 @@ def remove_user_lists_task(viewer_id, user_id, exclude_privacy=None): ListsStream().remove_user_lists(viewer, user, exclude_privacy=exclude_privacy) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def add_user_lists_task(viewer_id, user_id): """add all lists by a user to a viewer's stream""" viewer = models.User.objects.get(id=viewer_id) diff --git a/bookwyrm/management/commands/deduplicate_book_data.py b/bookwyrm/management/commands/deduplicate_book_data.py index ed01a78433..dde7d133c5 100644 --- a/bookwyrm/management/commands/deduplicate_book_data.py +++ b/bookwyrm/management/commands/deduplicate_book_data.py @@ -3,38 +3,7 @@ from django.core.management.base import BaseCommand from django.db.models import Count from bookwyrm import models - - -def update_related(canonical, obj): - """update all the models with fk to the object being removed""" - # move related models to canonical - related_models = [ - (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects - ] - for (related_field, related_model) in related_models: - related_objs = related_model.objects.filter(**{related_field: obj}) - for related_obj in related_objs: - print("replacing in", related_model.__name__, related_field, related_obj.id) - try: - setattr(related_obj, related_field, canonical) - related_obj.save() - except TypeError: - getattr(related_obj, related_field).add(canonical) - getattr(related_obj, related_field).remove(obj) - - -def copy_data(canonical, obj): - """try to get the most data possible""" - for data_field in obj._meta.get_fields(): - if not hasattr(data_field, "activitypub_field"): - continue - data_value = getattr(obj, data_field.name) - if not data_value: - continue - if not getattr(canonical, data_field.name): - print("setting data field", data_field.name, data_value) - setattr(canonical, data_field.name, data_value) - canonical.save() +from bookwyrm.management.merge import merge_objects def dedupe_model(model): @@ -61,19 +30,16 @@ def dedupe_model(model): print("keeping", canonical.remote_id) for obj in objs[1:]: print(obj.remote_id) - copy_data(canonical, obj) - update_related(canonical, obj) - # remove the outdated entry - obj.delete() + merge_objects(canonical, obj) class Command(BaseCommand): - """dedplucate allllll the book data models""" + """deduplicate allllll the book data models""" help = "merges duplicate book data" # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): - """run deudplications""" + """run deduplications""" dedupe_model(models.Edition) dedupe_model(models.Work) dedupe_model(models.Author) diff --git a/bookwyrm/management/commands/merge_authors.py b/bookwyrm/management/commands/merge_authors.py new file mode 100644 index 0000000000..7465df1479 --- /dev/null +++ b/bookwyrm/management/commands/merge_authors.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge author data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two authors by ID""" + + help = "merges specified authors into one" + + MODEL = models.Author diff --git a/bookwyrm/management/commands/merge_editions.py b/bookwyrm/management/commands/merge_editions.py new file mode 100644 index 0000000000..9ed6962019 --- /dev/null +++ b/bookwyrm/management/commands/merge_editions.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge edition data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two editions by ID""" + + help = "merges specified editions into one" + + MODEL = models.Edition diff --git a/bookwyrm/management/commands/merge_works.py b/bookwyrm/management/commands/merge_works.py new file mode 100644 index 0000000000..619d0509ac --- /dev/null +++ b/bookwyrm/management/commands/merge_works.py @@ -0,0 +1,12 @@ +""" PROCEED WITH CAUTION: uses deduplication fields to permanently +merge work data objects """ +from bookwyrm import models +from bookwyrm.management.merge_command import MergeCommand + + +class Command(MergeCommand): + """merges two works by ID""" + + help = "merges specified works into one" + + MODEL = models.Work diff --git a/bookwyrm/management/commands/remove_editions.py b/bookwyrm/management/commands/remove_editions.py index 9eb9b7da8d..5cb430a93b 100644 --- a/bookwyrm/management/commands/remove_editions.py +++ b/bookwyrm/management/commands/remove_editions.py @@ -33,10 +33,10 @@ def remove_editions(): class Command(BaseCommand): - """dedplucate allllll the book data models""" + """deduplicate allllll the book data models""" help = "merges duplicate book data" # pylint: disable=no-self-use,unused-argument def handle(self, *args, **options): - """run deudplications""" + """run deduplications""" remove_editions() diff --git a/bookwyrm/management/commands/revoke_preview_image_tasks.py b/bookwyrm/management/commands/revoke_preview_image_tasks.py index 6d6e59e8fb..7b0947b12c 100644 --- a/bookwyrm/management/commands/revoke_preview_image_tasks.py +++ b/bookwyrm/management/commands/revoke_preview_image_tasks.py @@ -9,7 +9,7 @@ class Command(BaseCommand): # pylint: disable=unused-argument def handle(self, *args, **options): - """reveoke nonessential low priority tasks""" + """revoke nonessential low priority tasks""" types = [ "bookwyrm.preview_images.generate_edition_preview_image_task", "bookwyrm.preview_images.generate_user_preview_image_task", diff --git a/bookwyrm/management/merge.py b/bookwyrm/management/merge.py new file mode 100644 index 0000000000..f55229f18d --- /dev/null +++ b/bookwyrm/management/merge.py @@ -0,0 +1,50 @@ +from django.db.models import ManyToManyField + + +def update_related(canonical, obj): + """update all the models with fk to the object being removed""" + # move related models to canonical + related_models = [ + (r.remote_field.name, r.related_model) for r in canonical._meta.related_objects + ] + for (related_field, related_model) in related_models: + # Skip the ManyToMany fields that aren’t auto-created. These + # should have a corresponding OneToMany field in the model for + # the linking table anyway. If we update it through that model + # instead then we won’t lose the extra fields in the linking + # table. + related_field_obj = related_model._meta.get_field(related_field) + if isinstance(related_field_obj, ManyToManyField): + through = related_field_obj.remote_field.through + if not through._meta.auto_created: + continue + related_objs = related_model.objects.filter(**{related_field: obj}) + for related_obj in related_objs: + print("replacing in", related_model.__name__, related_field, related_obj.id) + try: + setattr(related_obj, related_field, canonical) + related_obj.save() + except TypeError: + getattr(related_obj, related_field).add(canonical) + getattr(related_obj, related_field).remove(obj) + + +def copy_data(canonical, obj): + """try to get the most data possible""" + for data_field in obj._meta.get_fields(): + if not hasattr(data_field, "activitypub_field"): + continue + data_value = getattr(obj, data_field.name) + if not data_value: + continue + if not getattr(canonical, data_field.name): + print("setting data field", data_field.name, data_value) + setattr(canonical, data_field.name, data_value) + canonical.save() + + +def merge_objects(canonical, obj): + copy_data(canonical, obj) + update_related(canonical, obj) + # remove the outdated entry + obj.delete() diff --git a/bookwyrm/management/merge_command.py b/bookwyrm/management/merge_command.py new file mode 100644 index 0000000000..805dc73fa4 --- /dev/null +++ b/bookwyrm/management/merge_command.py @@ -0,0 +1,29 @@ +from bookwyrm.management.merge import merge_objects +from django.core.management.base import BaseCommand + + +class MergeCommand(BaseCommand): + """base class for merge commands""" + + def add_arguments(self, parser): + """add the arguments for this command""" + parser.add_argument("--canonical", type=int, required=True) + parser.add_argument("--other", type=int, required=True) + + # pylint: disable=no-self-use,unused-argument + def handle(self, *args, **options): + """merge the two objects""" + model = self.MODEL + + try: + canonical = model.objects.get(id=options["canonical"]) + except model.DoesNotExist: + print("canonical book doesn’t exist!") + return + try: + other = model.objects.get(id=options["other"]) + except model.DoesNotExist: + print("other book doesn’t exist!") + return + + merge_objects(canonical, other) diff --git a/bookwyrm/migrations/0006_auto_20200221_1702_squashed_0064_merge_20201101_1913.py b/bookwyrm/migrations/0006_auto_20200221_1702_squashed_0064_merge_20201101_1913.py index c06fa40a03..f25bafe157 100644 --- a/bookwyrm/migrations/0006_auto_20200221_1702_squashed_0064_merge_20201101_1913.py +++ b/bookwyrm/migrations/0006_auto_20200221_1702_squashed_0064_merge_20201101_1913.py @@ -1467,7 +1467,7 @@ class Migration(migrations.Migration): ( "expiry", models.DateTimeField( - default=bookwyrm.models.site.get_passowrd_reset_expiry + default=bookwyrm.models.site.get_password_reset_expiry ), ), ( diff --git a/bookwyrm/migrations/0101_auto_20210929_1847.py b/bookwyrm/migrations/0101_auto_20210929_1847.py index 3fca28eace..967b59819e 100644 --- a/bookwyrm/migrations/0101_auto_20210929_1847.py +++ b/bookwyrm/migrations/0101_auto_20210929_1847.py @@ -6,7 +6,7 @@ def infer_format(app_registry, schema_editor): - """set the new phsyical format field based on existing format data""" + """set the new physical format field based on existing format data""" db_alias = schema_editor.connection.alias editions = ( diff --git a/bookwyrm/migrations/0102_remove_connector_local.py b/bookwyrm/migrations/0102_remove_connector_local.py index 857f0f589a..9bfd8b1d0b 100644 --- a/bookwyrm/migrations/0102_remove_connector_local.py +++ b/bookwyrm/migrations/0102_remove_connector_local.py @@ -5,7 +5,7 @@ def remove_self_connector(app_registry, schema_editor): - """set the new phsyical format field based on existing format data""" + """set the new physical format field based on existing format data""" db_alias = schema_editor.connection.alias app_registry.get_model("bookwyrm", "Connector").objects.using(db_alias).filter( connector_file="self_connector" diff --git a/bookwyrm/migrations/0178_auto_20230328_2132.py b/bookwyrm/migrations/0178_auto_20230328_2132.py new file mode 100644 index 0000000000..9decc001f8 --- /dev/null +++ b/bookwyrm/migrations/0178_auto_20230328_2132.py @@ -0,0 +1,61 @@ +# Generated by Django 3.2.18 on 2023-03-28 21:32 + +import bookwyrm.models.fields +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("auth", "0012_alter_user_first_name_max_length"), + ("bookwyrm", "0177_merge_0174_auto_20230222_1742_0176_hashtag_support"), + ] + + operations = [ + migrations.AlterField( + model_name="hashtag", + name="name", + field=bookwyrm.models.fields.CICharField(max_length=256), + ), + migrations.AlterField( + model_name="sitesettings", + name="default_user_auth_group", + field=models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.RESTRICT, + to="auth.group", + ), + ), + migrations.AlterField( + model_name="user", + name="preferred_language", + field=models.CharField( + blank=True, + choices=[ + ("en-us", "English"), + ("ca-es", "Català (Catalan)"), + ("de-de", "Deutsch (German)"), + ("eo-uy", "Esperanto (Esperanto)"), + ("es-es", "Español (Spanish)"), + ("eu-es", "Euskara (Basque)"), + ("gl-es", "Galego (Galician)"), + ("it-it", "Italiano (Italian)"), + ("fi-fi", "Suomi (Finnish)"), + ("fr-fr", "Français (French)"), + ("lt-lt", "Lietuvių (Lithuanian)"), + ("no-no", "Norsk (Norwegian)"), + ("pl-pl", "Polski (Polish)"), + ("pt-br", "Português do Brasil (Brazilian Portuguese)"), + ("pt-pt", "Português Europeu (European Portuguese)"), + ("ro-ro", "Română (Romanian)"), + ("sv-se", "Svenska (Swedish)"), + ("zh-hans", "简体中文 (Simplified Chinese)"), + ("zh-hant", "繁體中文 (Traditional Chinese)"), + ], + max_length=255, + null=True, + ), + ), + ] diff --git a/bookwyrm/models/activitypub_mixin.py b/bookwyrm/models/activitypub_mixin.py index 83ca90b0a5..e76433189e 100644 --- a/bookwyrm/models/activitypub_mixin.py +++ b/bookwyrm/models/activitypub_mixin.py @@ -25,7 +25,7 @@ from bookwyrm.models.fields import ImageField, ManyToManyField logger = logging.getLogger(__name__) -# I tried to separate these classes into mutliple files but I kept getting +# I tried to separate these classes into multiple files but I kept getting # circular import errors so I gave up. I'm sure it could be done though! PropertyField = namedtuple("PropertyField", ("set_activity_from_field")) @@ -91,7 +91,7 @@ def find_existing_by_remote_id(cls, remote_id): @classmethod def find_existing(cls, data): - """compare data to fields that can be used for deduplation. + """compare data to fields that can be used for deduplication. This always includes remote_id, but can also be unique identifiers like an isbn for an edition""" filters = [] @@ -234,8 +234,8 @@ def save(self, *args, created=None, software=None, priority=BROADCAST, **kwargs) activity = self.to_create_activity(user) self.broadcast(activity, user, software=software, queue=priority) except AttributeError: - # janky as heck, this catches the mutliple inheritence chain - # for boosts and ignores this auxilliary broadcast + # janky as heck, this catches the multiple inheritance chain + # for boosts and ignores this auxiliary broadcast return return @@ -311,7 +311,7 @@ class OrderedCollectionPageMixin(ObjectMixin): @property def collection_remote_id(self): - """this can be overriden if there's a special remote id, ie outbox""" + """this can be overridden if there's a special remote id, ie outbox""" return self.remote_id def to_ordered_collection( @@ -339,7 +339,7 @@ def to_ordered_collection( activity["id"] = remote_id paginated = Paginator(queryset, PAGE_LENGTH) - # add computed fields specific to orderd collections + # add computed fields specific to ordered collections activity["totalItems"] = paginated.count activity["first"] = f"{remote_id}?page=1" activity["last"] = f"{remote_id}?page={paginated.num_pages}" @@ -405,7 +405,7 @@ def save(self, *args, broadcast=True, priority=MEDIUM, **kwargs): # first off, we want to save normally no matter what super().save(*args, **kwargs) - # list items can be updateda, normally you would only broadcast on created + # list items can be updated, normally you would only broadcast on created if not broadcast or not self.user.local: return @@ -506,7 +506,7 @@ def unfurl_related_field(related_field, sort_field=None): return related_field.remote_id -@app.task(queue=BROADCAST, ignore_result=True) +@app.task(queue=BROADCAST) def broadcast_task(sender_id: int, activity: str, recipients: List[str]): """the celery task for broadcast""" user_model = apps.get_model("bookwyrm.User", require_ready=True) @@ -565,7 +565,7 @@ async def sign_and_send( def to_ordered_collection_page( queryset, remote_id, id_only=False, page=1, pure=False, **kwargs ): - """serialize and pagiante a queryset""" + """serialize and paginate a queryset""" paginated = Paginator(queryset, PAGE_LENGTH) activity_page = paginated.get_page(page) diff --git a/bookwyrm/models/annual_goal.py b/bookwyrm/models/annual_goal.py index 0eefacb32b..d36b822df2 100644 --- a/bookwyrm/models/annual_goal.py +++ b/bookwyrm/models/annual_goal.py @@ -24,7 +24,7 @@ class AnnualGoal(BookWyrmModel): ) class Meta: - """unqiueness constraint""" + """uniqueness constraint""" unique_together = ("user", "year") diff --git a/bookwyrm/models/antispam.py b/bookwyrm/models/antispam.py index c3afadf287..1e20df3408 100644 --- a/bookwyrm/models/antispam.py +++ b/bookwyrm/models/antispam.py @@ -65,7 +65,7 @@ class AutoMod(AdminModel): created_by = models.ForeignKey("User", on_delete=models.PROTECT) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def automod_task(): """Create reports""" if not AutoMod.objects.exists(): diff --git a/bookwyrm/models/book.py b/bookwyrm/models/book.py index a5be51a298..4e7ffcad30 100644 --- a/bookwyrm/models/book.py +++ b/bookwyrm/models/book.py @@ -321,7 +321,7 @@ class Edition(Book): def get_rank(self): """calculate how complete the data is on this edition""" rank = 0 - # big ups for havinga cover + # big ups for having a cover rank += int(bool(self.cover)) * 3 # is it in the instance's preferred language? rank += int(bool(DEFAULT_LANGUAGE in self.languages)) diff --git a/bookwyrm/models/favorite.py b/bookwyrm/models/favorite.py index 4c36752191..98fbce550d 100644 --- a/bookwyrm/models/favorite.py +++ b/bookwyrm/models/favorite.py @@ -20,8 +20,9 @@ class Favorite(ActivityMixin, BookWyrmModel): activity_serializer = activitypub.Like + # pylint: disable=unused-argument @classmethod - def ignore_activity(cls, activity): + def ignore_activity(cls, activity, allow_external_connections=True): """don't bother with incoming favs of unknown statuses""" return not Status.objects.filter(remote_id=activity.object).exists() diff --git a/bookwyrm/models/fields.py b/bookwyrm/models/fields.py index 6cfe4c10c2..df4bb2e4a4 100644 --- a/bookwyrm/models/fields.py +++ b/bookwyrm/models/fields.py @@ -71,11 +71,11 @@ def __init__( def set_field_from_activity( self, instance, data, overwrite=True, allow_external_connections=True ): - """helper function for assinging a value to the field. Returns if changed""" + """helper function for assigning a value to the field. Returns if changed""" try: value = getattr(data, self.get_activitypub_field()) except AttributeError: - # masssively hack-y workaround for boosts + # massively hack-y workaround for boosts if self.get_activitypub_field() != "attributedTo": raise value = getattr(data, "actor") @@ -221,7 +221,7 @@ def field_to_activity(self, value): class PrivacyField(ActivitypubFieldMixin, models.CharField): - """this maps to two differente activitypub fields""" + """this maps to two different activitypub fields""" public = "https://www.w3.org/ns/activitystreams#Public" @@ -431,7 +431,7 @@ def __init__(self, *args, alt_field=None, **kwargs): def set_field_from_activity( self, instance, data, save=True, overwrite=True, allow_external_connections=True ): - """helper function for assinging a value to the field""" + """helper function for assigning a value to the field""" value = getattr(data, self.get_activitypub_field()) formatted = self.field_from_activity( value, allow_external_connections=allow_external_connections diff --git a/bookwyrm/models/import_job.py b/bookwyrm/models/import_job.py index 5f564d3907..a489edb7c4 100644 --- a/bookwyrm/models/import_job.py +++ b/bookwyrm/models/import_job.py @@ -252,9 +252,12 @@ def review(self): @property def rating(self): """x/5 star rating for a book""" - if self.normalized_data.get("rating"): + if not self.normalized_data.get("rating"): + return None + try: return float(self.normalized_data.get("rating")) - return None + except ValueError: + return None @property def date_added(self): @@ -327,7 +330,7 @@ def __str__(self): ) -@app.task(queue=IMPORTS, ignore_result=True) +@app.task(queue=IMPORTS) def start_import_task(job_id): """trigger the child tasks for each row""" job = ImportJob.objects.get(id=job_id) @@ -346,7 +349,7 @@ def start_import_task(job_id): job.save() -@app.task(queue=IMPORTS, ignore_result=True) +@app.task(queue=IMPORTS) def import_item_task(item_id): """resolve a row into a book""" item = ImportItem.objects.get(id=item_id) diff --git a/bookwyrm/models/link.py b/bookwyrm/models/link.py index 56b096bc2a..d334a9d29e 100644 --- a/bookwyrm/models/link.py +++ b/bookwyrm/models/link.py @@ -31,7 +31,7 @@ class Link(ActivitypubMixin, BookWyrmModel): @property def name(self): - """link name via the assocaited domain""" + """link name via the associated domain""" return self.domain.name def save(self, *args, **kwargs): diff --git a/bookwyrm/models/notification.py b/bookwyrm/models/notification.py index 29f7b0c2da..522038f9ab 100644 --- a/bookwyrm/models/notification.py +++ b/bookwyrm/models/notification.py @@ -284,7 +284,7 @@ def notify_user_on_list_item_add(sender, instance, created, *args, **kwargs): return list_owner = instance.book_list.user - # create a notification if somoene ELSE added to a local user's list + # create a notification if someone ELSE added to a local user's list if list_owner.local and list_owner != instance.user: # keep the related_user singular, group the items Notification.notify_list_item(list_owner, instance) diff --git a/bookwyrm/models/readthrough.py b/bookwyrm/models/readthrough.py index 239ec56be3..4911c715b7 100644 --- a/bookwyrm/models/readthrough.py +++ b/bookwyrm/models/readthrough.py @@ -8,7 +8,7 @@ class ProgressMode(models.TextChoices): - """types of prgress available""" + """types of progress available""" PAGE = "PG", "page" PERCENT = "PCT", "percent" diff --git a/bookwyrm/models/relationship.py b/bookwyrm/models/relationship.py index 422967855d..4754bea36d 100644 --- a/bookwyrm/models/relationship.py +++ b/bookwyrm/models/relationship.py @@ -34,7 +34,7 @@ def privacy(self): @property def recipients(self): - """the remote user needs to recieve direct broadcasts""" + """the remote user needs to receive direct broadcasts""" return [u for u in [self.user_subject, self.user_object] if not u.local] def save(self, *args, **kwargs): diff --git a/bookwyrm/models/shelf.py b/bookwyrm/models/shelf.py index 8e754bc471..c52cb6ab82 100644 --- a/bookwyrm/models/shelf.py +++ b/bookwyrm/models/shelf.py @@ -80,7 +80,7 @@ def raise_not_deletable(self, viewer): raise PermissionDenied() class Meta: - """user/shelf unqiueness""" + """user/shelf uniqueness""" unique_together = ("user", "identifier") diff --git a/bookwyrm/models/site.py b/bookwyrm/models/site.py index 35f007be21..a27c4b70d8 100644 --- a/bookwyrm/models/site.py +++ b/bookwyrm/models/site.py @@ -209,7 +209,7 @@ def save(self, *args, **kwargs): super().save(*args, **kwargs) -def get_passowrd_reset_expiry(): +def get_password_reset_expiry(): """give people a limited time to use the link""" now = timezone.now() return now + datetime.timedelta(days=1) @@ -219,7 +219,7 @@ class PasswordReset(models.Model): """gives someone access to create an account on the instance""" code = models.CharField(max_length=32, default=new_access_code) - expiry = models.DateTimeField(default=get_passowrd_reset_expiry) + expiry = models.DateTimeField(default=get_password_reset_expiry) user = models.OneToOneField(User, on_delete=models.CASCADE) def valid(self): diff --git a/bookwyrm/models/status.py b/bookwyrm/models/status.py index 1fcc9ee757..047d0aba6a 100644 --- a/bookwyrm/models/status.py +++ b/bookwyrm/models/status.py @@ -116,10 +116,16 @@ def recipients(self): return list(set(mentions)) @classmethod - def ignore_activity(cls, activity): # pylint: disable=too-many-return-statements + def ignore_activity( + cls, activity, allow_external_connections=True + ): # pylint: disable=too-many-return-statements """keep notes if they are replies to existing statuses""" if activity.type == "Announce": - boosted = activitypub.resolve_remote_id(activity.object, get_activity=True) + boosted = activitypub.resolve_remote_id( + activity.object, + get_activity=True, + allow_external_connections=allow_external_connections, + ) if not boosted: # if we can't load the status, definitely ignore it return True diff --git a/bookwyrm/models/user.py b/bookwyrm/models/user.py index 6d26b7b171..85e1f0edbc 100644 --- a/bookwyrm/models/user.py +++ b/bookwyrm/models/user.py @@ -469,7 +469,7 @@ def save(self, *args, **kwargs): return super().save(*args, **kwargs) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def set_remote_server(user_id): """figure out the user's remote server in the background""" user = User.objects.get(id=user_id) @@ -513,7 +513,7 @@ def get_or_create_remote_server(domain, refresh=False): return server -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def get_remote_reviews(outbox): """ingest reviews by a new remote bookwyrm user""" outbox_page = outbox + "?page=true&type=Review" diff --git a/bookwyrm/preview_images.py b/bookwyrm/preview_images.py index c218d87df3..549e124729 100644 --- a/bookwyrm/preview_images.py +++ b/bookwyrm/preview_images.py @@ -420,7 +420,7 @@ def save_and_cleanup(image, instance=None): # pylint: disable=invalid-name -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def generate_site_preview_image_task(): """generate preview_image for the website""" if not settings.ENABLE_PREVIEW_IMAGES: @@ -445,7 +445,7 @@ def generate_site_preview_image_task(): # pylint: disable=invalid-name -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def generate_edition_preview_image_task(book_id): """generate preview_image for a book""" if not settings.ENABLE_PREVIEW_IMAGES: @@ -470,7 +470,7 @@ def generate_edition_preview_image_task(book_id): save_and_cleanup(image, instance=book) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def generate_user_preview_image_task(user_id): """generate preview_image for a user""" if not settings.ENABLE_PREVIEW_IMAGES: @@ -496,7 +496,7 @@ def generate_user_preview_image_task(user_id): save_and_cleanup(image, instance=user) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def remove_user_preview_image_task(user_id): """remove preview_image for a user""" if not settings.ENABLE_PREVIEW_IMAGES: diff --git a/bookwyrm/redis_store.py b/bookwyrm/redis_store.py index f25829f5ce..e188487aaa 100644 --- a/bookwyrm/redis_store.py +++ b/bookwyrm/redis_store.py @@ -16,12 +16,12 @@ def get_value(self, obj): """the object and rank""" return {obj.id: self.get_rank(obj)} - def add_object_to_related_stores(self, obj, execute=True): - """add an object to all suitable stores""" + def add_object_to_stores(self, obj, stores, execute=True): + """add an object to a given set of stores""" value = self.get_value(obj) # we want to do this as a bulk operation, hence "pipeline" pipeline = r.pipeline() - for store in self.get_stores_for_object(obj): + for store in stores: # add the status to the feed pipeline.zadd(store, value) # trim the store @@ -32,14 +32,14 @@ def add_object_to_related_stores(self, obj, execute=True): # and go! return pipeline.execute() - def remove_object_from_related_stores(self, obj, stores=None): + # pylint: disable=no-self-use + def remove_object_from_stores(self, obj, stores): """remove an object from all stores""" - # if the stoers are provided, the object can just be an id + # if the stores are provided, the object can just be an id if stores and isinstance(obj, int): obj_id = obj else: obj_id = obj.id - stores = self.get_stores_for_object(obj) if stores is None else stores pipeline = r.pipeline() for store in stores: pipeline.zrem(store, -1, obj_id) @@ -82,10 +82,6 @@ def populate_store(self, store): def get_objects_for_store(self, store): """a queryset of what should go in a store, used for populating it""" - @abstractmethod - def get_stores_for_object(self, obj): - """the stores that an object belongs in""" - @abstractmethod def get_rank(self, obj): """how to rank an object""" diff --git a/bookwyrm/settings.py b/bookwyrm/settings.py index 3f14daf1b7..ab73115a1b 100644 --- a/bookwyrm/settings.py +++ b/bookwyrm/settings.py @@ -4,6 +4,7 @@ import requests from django.utils.translation import gettext_lazy as _ +from django.core.exceptions import ImproperlyConfigured # pylint: disable=line-too-long @@ -11,22 +12,22 @@ env = Env() env.read_env() DOMAIN = env("DOMAIN") -VERSION = "0.6.0" +VERSION = "0.6.2" RELEASE_API = env( "RELEASE_API", "https://api.github.com/repos/bookwyrm-social/bookwyrm/releases/latest", ) -PAGE_LENGTH = env("PAGE_LENGTH", 15) +PAGE_LENGTH = env.int("PAGE_LENGTH", 15) DEFAULT_LANGUAGE = env("DEFAULT_LANGUAGE", "English") -JS_CACHE = "a7d4e720" +JS_CACHE = "ea91d7df" # email EMAIL_BACKEND = env("EMAIL_BACKEND", "django.core.mail.backends.smtp.EmailBackend") EMAIL_HOST = env("EMAIL_HOST") -EMAIL_PORT = env("EMAIL_PORT", 587) +EMAIL_PORT = env.int("EMAIL_PORT", 587) EMAIL_HOST_USER = env("EMAIL_HOST_USER") EMAIL_HOST_PASSWORD = env("EMAIL_HOST_PASSWORD") EMAIL_USE_TLS = env.bool("EMAIL_USE_TLS", True) @@ -68,13 +69,15 @@ # Quick-start development settings - unsuitable for production # See https://docs.djangoproject.com/en/3.2/howto/deployment/checklist/ -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = env("SECRET_KEY") - # SECURITY WARNING: don't run with debug turned on in production! DEBUG = env.bool("DEBUG", True) USE_HTTPS = env.bool("USE_HTTPS", not DEBUG) +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = env("SECRET_KEY") +if not DEBUG and SECRET_KEY == "7(2w1sedok=aznpq)ta1mc4i%4h=xx@hxwx*o57ctsuml0x%fr": + raise ImproperlyConfigured("You must change the SECRET_KEY env variable") + ALLOWED_HOSTS = env.list("ALLOWED_HOSTS", ["*"]) # Application definition @@ -205,14 +208,14 @@ # redis/activity streams settings REDIS_ACTIVITY_HOST = env("REDIS_ACTIVITY_HOST", "localhost") -REDIS_ACTIVITY_PORT = env("REDIS_ACTIVITY_PORT", 6379) +REDIS_ACTIVITY_PORT = env.int("REDIS_ACTIVITY_PORT", 6379) REDIS_ACTIVITY_PASSWORD = requests.utils.quote(env("REDIS_ACTIVITY_PASSWORD", "")) -REDIS_ACTIVITY_DB_INDEX = env("REDIS_ACTIVITY_DB_INDEX", 0) +REDIS_ACTIVITY_DB_INDEX = env.int("REDIS_ACTIVITY_DB_INDEX", 0) REDIS_ACTIVITY_URL = env( "REDIS_ACTIVITY_URL", f"redis://:{REDIS_ACTIVITY_PASSWORD}@{REDIS_ACTIVITY_HOST}:{REDIS_ACTIVITY_PORT}/{REDIS_ACTIVITY_DB_INDEX}", ) -MAX_STREAM_LENGTH = int(env("MAX_STREAM_LENGTH", 200)) +MAX_STREAM_LENGTH = env.int("MAX_STREAM_LENGTH", 200) STREAMS = [ {"key": "home", "name": _("Home Timeline"), "shortname": _("Home")}, @@ -221,12 +224,12 @@ # Search configuration # total time in seconds that the instance will spend searching connectors -SEARCH_TIMEOUT = int(env("SEARCH_TIMEOUT", 8)) +SEARCH_TIMEOUT = env.int("SEARCH_TIMEOUT", 8) # timeout for a query to an individual connector -QUERY_TIMEOUT = int(env("QUERY_TIMEOUT", 5)) +QUERY_TIMEOUT = env.int("INTERACTIVE_QUERY_TIMEOUT", env.int("QUERY_TIMEOUT", 5)) # Redis cache backend -if env("USE_DUMMY_CACHE", False): +if env.bool("USE_DUMMY_CACHE", False): CACHES = { "default": { "BACKEND": "django.core.cache.backends.dummy.DummyCache", @@ -256,7 +259,7 @@ "USER": env("POSTGRES_USER", "bookwyrm"), "PASSWORD": env("POSTGRES_PASSWORD", "bookwyrm"), "HOST": env("POSTGRES_HOST", ""), - "PORT": env("PGPORT", 5432), + "PORT": env.int("PGPORT", 5432), }, } @@ -291,6 +294,7 @@ ("en-us", _("English")), ("ca-es", _("Català (Catalan)")), ("de-de", _("Deutsch (German)")), + ("eo-uy", _("Esperanto (Esperanto)")), ("es-es", _("Español (Spanish)")), ("eu-es", _("Euskara (Basque)")), ("gl-es", _("Galego (Galician)")), @@ -341,6 +345,7 @@ CSRF_COOKIE_SECURE = True USE_S3 = env.bool("USE_S3", False) +USE_AZURE = env.bool("USE_AZURE", False) if USE_S3: # AWS settings @@ -364,6 +369,27 @@ DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.ImagesStorage" CSP_DEFAULT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS CSP_SCRIPT_SRC = ["'self'", AWS_S3_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS +elif USE_AZURE: + AZURE_ACCOUNT_NAME = env("AZURE_ACCOUNT_NAME") + AZURE_ACCOUNT_KEY = env("AZURE_ACCOUNT_KEY") + AZURE_CONTAINER = env("AZURE_CONTAINER") + AZURE_CUSTOM_DOMAIN = env("AZURE_CUSTOM_DOMAIN") + # Azure Static settings + STATIC_LOCATION = "static" + STATIC_URL = ( + f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{STATIC_LOCATION}/" + ) + STATICFILES_STORAGE = "bookwyrm.storage_backends.AzureStaticStorage" + # Azure Media settings + MEDIA_LOCATION = "images" + MEDIA_URL = ( + f"{PROTOCOL}://{AZURE_CUSTOM_DOMAIN}/{AZURE_CONTAINER}/{MEDIA_LOCATION}/" + ) + MEDIA_FULL_URL = MEDIA_URL + STATIC_FULL_URL = STATIC_URL + DEFAULT_FILE_STORAGE = "bookwyrm.storage_backends.AzureImagesStorage" + CSP_DEFAULT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS + CSP_SCRIPT_SRC = ["'self'", AZURE_CUSTOM_DOMAIN] + CSP_ADDITIONAL_HOSTS else: STATIC_URL = "/static/" MEDIA_URL = "/images/" @@ -377,6 +403,7 @@ OTEL_EXPORTER_OTLP_ENDPOINT = env("OTEL_EXPORTER_OTLP_ENDPOINT", None) OTEL_EXPORTER_OTLP_HEADERS = env("OTEL_EXPORTER_OTLP_HEADERS", None) OTEL_SERVICE_NAME = env("OTEL_SERVICE_NAME", None) +OTEL_EXPORTER_CONSOLE = env.bool("OTEL_EXPORTER_CONSOLE", False) TWO_FACTOR_LOGIN_MAX_SECONDS = env.int("TWO_FACTOR_LOGIN_MAX_SECONDS", 60) TWO_FACTOR_LOGIN_VALIDITY_WINDOW = env.int("TWO_FACTOR_LOGIN_VALIDITY_WINDOW", 2) diff --git a/bookwyrm/static/css/bookwyrm/components/_book_cover.scss b/bookwyrm/static/css/bookwyrm/components/_book_cover.scss index d1125197eb..48b564a0b7 100644 --- a/bookwyrm/static/css/bookwyrm/components/_book_cover.scss +++ b/bookwyrm/static/css/bookwyrm/components/_book_cover.scss @@ -5,7 +5,7 @@ * - .book-cover is positioned and sized based on its container. * * To have the cover within specific dimensions, specify a width or height for - * standard bulma’s named breapoints: + * standard bulma’s named breakpoints: * * `is-(w|h)-(auto|xs|s|m|l|xl|xxl)[-(mobile|tablet|desktop)]` * @@ -43,7 +43,7 @@ max-height: 100%; /* Useful when stretching under-sized images. */ - image-rendering: optimizequality; + image-rendering: optimizeQuality; image-rendering: smooth; } diff --git a/bookwyrm/static/css/bookwyrm/components/_tabs.scss b/bookwyrm/static/css/bookwyrm/components/_tabs.scss index 8e00f6a88b..2d68a383ba 100644 --- a/bookwyrm/static/css/bookwyrm/components/_tabs.scss +++ b/bookwyrm/static/css/bookwyrm/components/_tabs.scss @@ -44,12 +44,12 @@ .bw-tabs a:hover { border-bottom-color: transparent; - color: $text; + color: $text } .bw-tabs a.is-active { border-bottom-color: transparent; - color: $link; + color: $link } .bw-tabs.is-left { diff --git a/bookwyrm/static/css/themes/bookwyrm-dark.scss b/bookwyrm/static/css/themes/bookwyrm-dark.scss index ae904b4a41..c3e8655e36 100644 --- a/bookwyrm/static/css/themes/bookwyrm-dark.scss +++ b/bookwyrm/static/css/themes/bookwyrm-dark.scss @@ -98,6 +98,22 @@ $family-secondary: $family-sans-serif; } +.tabs li:not(.is-active) a { + color: #2e7eb9 !important; +} + .tabs li:not(.is-active) a:hover { + border-bottom-color: #2e7eb9 !important; +} + +.tabs li:not(.is-active) a { + color: #2e7eb9 !important; +} +.tabs li.is-active a { + color: #e6e6e6 !important; + border-bottom-color: #e6e6e6 !important ; +} + + #qrcode svg { background-color: #a6a6a6; } diff --git a/bookwyrm/static/css/themes/bookwyrm-light.scss b/bookwyrm/static/css/themes/bookwyrm-light.scss index efb13c23e4..bb7d340a9a 100644 --- a/bookwyrm/static/css/themes/bookwyrm-light.scss +++ b/bookwyrm/static/css/themes/bookwyrm-light.scss @@ -65,6 +65,22 @@ $family-secondary: $family-sans-serif; color: $grey !important; } +.tabs li:not(.is-active) a { + color: #3273dc !important; +} + .tabs li:not(.is-active) a:hover { + border-bottom-color: #3273dc !important; +} + +.tabs li:not(.is-active) a { + color: #3273dc !important; +} +.tabs li.is-active a { + color: #4a4a4a !important; + border-bottom-color: #4a4a4a !important ; +} + + @import "../bookwyrm.scss"; @import "../vendor/icons.css"; @import "../vendor/shepherd.scss"; diff --git a/bookwyrm/static/js/bookwyrm.js b/bookwyrm/static/js/bookwyrm.js index 6a6c0217fd..ceed12eba7 100644 --- a/bookwyrm/static/js/bookwyrm.js +++ b/bookwyrm/static/js/bookwyrm.js @@ -5,7 +5,7 @@ let BookWyrm = new (class { constructor() { this.MAX_FILE_SIZE_BYTES = 10 * 1000000; this.initOnDOMLoaded(); - this.initReccuringTasks(); + this.initRecurringTasks(); this.initEventListeners(); } @@ -77,7 +77,7 @@ let BookWyrm = new (class { /** * Execute recurring tasks. */ - initReccuringTasks() { + initRecurringTasks() { // Polling document.querySelectorAll("[data-poll]").forEach((liveArea) => this.polling(liveArea)); } diff --git a/bookwyrm/static/js/forms.js b/bookwyrm/static/js/forms.js index a48675b350..08066f137c 100644 --- a/bookwyrm/static/js/forms.js +++ b/bookwyrm/static/js/forms.js @@ -2,7 +2,7 @@ "use strict"; /** - * Remoev input field + * Remove input field * * @param {event} the button click event */ diff --git a/bookwyrm/storage_backends.py b/bookwyrm/storage_backends.py index 4fb0feff03..6dd9f522cd 100644 --- a/bookwyrm/storage_backends.py +++ b/bookwyrm/storage_backends.py @@ -2,6 +2,7 @@ import os from tempfile import SpooledTemporaryFile from storages.backends.s3boto3 import S3Boto3Storage +from storages.backends.azure_storage import AzureStorage class StaticStorage(S3Boto3Storage): # pylint: disable=abstract-method @@ -47,3 +48,16 @@ def _save(self, name, content): # Upload the object which will auto close the # content_autoclose instance return super()._save(name, content_autoclose) + + +class AzureStaticStorage(AzureStorage): # pylint: disable=abstract-method + """Storage class for Static contents""" + + location = "static" + + +class AzureImagesStorage(AzureStorage): # pylint: disable=abstract-method + """Storage class for Image files""" + + location = "images" + overwrite_files = False diff --git a/bookwyrm/suggested_users.py b/bookwyrm/suggested_users.py index ea6b1c55db..05e05891c8 100644 --- a/bookwyrm/suggested_users.py +++ b/bookwyrm/suggested_users.py @@ -4,13 +4,16 @@ from django.dispatch import receiver from django.db import transaction from django.db.models import signals, Count, Q, Case, When, IntegerField +from opentelemetry import trace from bookwyrm import models from bookwyrm.redis_store import RedisStore, r from bookwyrm.tasks import app, LOW, MEDIUM +from bookwyrm.telemetry import open_telemetry logger = logging.getLogger(__name__) +tracer = open_telemetry.tracer() class SuggestedUsers(RedisStore): @@ -49,30 +52,34 @@ def get_objects_for_store(self, store): ) def get_stores_for_object(self, obj): + """the stores that an object belongs in""" return [self.store_id(u) for u in self.get_users_for_object(obj)] def get_users_for_object(self, obj): # pylint: disable=no-self-use """given a user, who might want to follow them""" - return models.User.objects.filter(local=True,).exclude( + return models.User.objects.filter(local=True, is_active=True).exclude( Q(id=obj.id) | Q(followers=obj) | Q(id__in=obj.blocks.all()) | Q(blocks=obj) ) + @tracer.start_as_current_span("SuggestedUsers.rerank_obj") def rerank_obj(self, obj, update_only=True): """update all the instances of this user with new ranks""" + trace.get_current_span().set_attribute("update_only", update_only) pipeline = r.pipeline() for store_user in self.get_users_for_object(obj): - annotated_user = get_annotated_users( - store_user, - id=obj.id, - ).first() - if not annotated_user: - continue - - pipeline.zadd( - self.store_id(store_user), - self.get_value(annotated_user), - xx=update_only, - ) + with tracer.start_as_current_span("SuggestedUsers.rerank_obj/user") as _: + annotated_user = get_annotated_users( + store_user, + id=obj.id, + ).first() + if not annotated_user: + continue + + pipeline.zadd( + self.store_id(store_user), + self.get_value(annotated_user), + xx=update_only, + ) pipeline.execute() def rerank_user_suggestions(self, user): @@ -237,41 +244,45 @@ def domain_level_update(sender, instance, created, update_fields=None, **kwargs) # ------------------- TASKS -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def rerank_suggestions_task(user_id): """do the hard work in celery""" suggested_users.rerank_user_suggestions(user_id) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def rerank_user_task(user_id, update_only=False): """do the hard work in celery""" user = models.User.objects.get(id=user_id) suggested_users.rerank_obj(user, update_only=update_only) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def remove_user_task(user_id): """do the hard work in celery""" user = models.User.objects.get(id=user_id) - suggested_users.remove_object_from_related_stores(user) + suggested_users.remove_object_from_stores( + user, suggested_users.get_stores_for_object(user) + ) -@app.task(queue=MEDIUM, ignore_result=True) +@app.task(queue=MEDIUM) def remove_suggestion_task(user_id, suggested_user_id): """remove a specific user from a specific user's suggestions""" suggested_user = models.User.objects.get(id=suggested_user_id) suggested_users.remove_suggestion(user_id, suggested_user) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def bulk_remove_instance_task(instance_id): """remove a bunch of users from recs""" for user in models.User.objects.filter(federated_server__id=instance_id): - suggested_users.remove_object_from_related_stores(user) + suggested_users.remove_object_from_stores( + user, suggested_users.get_stores_for_object(user) + ) -@app.task(queue=LOW, ignore_result=True) +@app.task(queue=LOW) def bulk_add_instance_task(instance_id): """remove a bunch of users from recs""" for user in models.User.objects.filter(federated_server__id=instance_id): diff --git a/bookwyrm/telemetry/open_telemetry.py b/bookwyrm/telemetry/open_telemetry.py index 0b38a04b19..00b24d4b0a 100644 --- a/bookwyrm/telemetry/open_telemetry.py +++ b/bookwyrm/telemetry/open_telemetry.py @@ -1,10 +1,19 @@ from opentelemetry import trace from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter from opentelemetry.sdk.trace import TracerProvider -from opentelemetry.sdk.trace.export import BatchSpanProcessor +from opentelemetry.sdk.trace.export import BatchSpanProcessor, ConsoleSpanExporter + +from bookwyrm import settings trace.set_tracer_provider(TracerProvider()) -trace.get_tracer_provider().add_span_processor(BatchSpanProcessor(OTLPSpanExporter())) +if settings.OTEL_EXPORTER_CONSOLE: + trace.get_tracer_provider().add_span_processor( + BatchSpanProcessor(ConsoleSpanExporter()) + ) +elif settings.OTEL_EXPORTER_OTLP_ENDPOINT: + trace.get_tracer_provider().add_span_processor( + BatchSpanProcessor(OTLPSpanExporter()) + ) def instrumentDjango(): @@ -13,6 +22,12 @@ def instrumentDjango(): DjangoInstrumentor().instrument() +def instrumentPostgres(): + from opentelemetry.instrumentation.psycopg2 import Psycopg2Instrumentor + + Psycopg2Instrumentor().instrument() + + def instrumentCelery(): from opentelemetry.instrumentation.celery import CeleryInstrumentor from celery.signals import worker_process_init @@ -20,3 +35,7 @@ def instrumentCelery(): @worker_process_init.connect(weak=False) def init_celery_tracing(*args, **kwargs): CeleryInstrumentor().instrument() + + +def tracer(): + return trace.get_tracer(__name__) diff --git a/bookwyrm/templates/book/book.html b/bookwyrm/templates/book/book.html index e9eff99ab3..e24f81d79a 100644 --- a/bookwyrm/templates/book/book.html +++ b/bookwyrm/templates/book/book.html @@ -4,6 +4,7 @@ {% load humanize %} {% load utilities %} {% load static %} +{% load shelf_tags %} {% block title %}{{ book|book_title }}{% endblock %} @@ -46,7 +47,13 @@

- ({{ book.series }}{% if book.series_number %} #{{ book.series_number }}{% endif %}) + {% if book.authors.exists %} + + {% endif %} + {{ book.series }}{% if book.series_number %} #{{ book.series_number }}{% endif %} + {% if book.authors.exists %} + + {% endif %} {% endif %}

{% endif %} @@ -239,7 +246,7 @@