diff --git a/app/services/activitypub/fetch_all_replies_service.rb b/app/services/activitypub/fetch_all_replies_service.rb index 19fae824dff495..cba476ab46c30c 100644 --- a/app/services/activitypub/fetch_all_replies_service.rb +++ b/app/services/activitypub/fetch_all_replies_service.rb @@ -10,7 +10,7 @@ def call(collection_or_uri, allow_synchronous_requests: true, request_id: nil) @allow_synchronous_requests = allow_synchronous_requests @filter_by_host = false - @items = collection_items(collection_or_uri) + @items = collection_items(collection_or_uri, fetch_all: true) @items = filtered_replies return if @items.nil? @@ -25,6 +25,8 @@ def filtered_replies return if @items.nil? # find all statuses that we *shouldn't* update the replies for, and use that as a filter + # Typically we assume this is smaller than the replies we *should* fetch, + # so we minimize the number of uris we should load here. uris = @items.map { |item| value_or_id(item) } dont_update = Status.where(uri: uris).shouldnt_fetch_replies.pluck(:uri) diff --git a/app/services/activitypub/fetch_replies_service.rb b/app/services/activitypub/fetch_replies_service.rb index 0dcdb638e1339e..2963f616e2882b 100644 --- a/app/services/activitypub/fetch_replies_service.rb +++ b/app/services/activitypub/fetch_replies_service.rb @@ -5,6 +5,8 @@ class ActivityPub::FetchRepliesService < BaseService # Limit of fetched replies MAX_REPLIES = 5 + # Limit when fetching all (to prevent infinite fetch attack) + FETCH_ALL_MAX_REPLIES = 500 def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil, filter_by_host: true) @account = parent_status.account @@ -21,7 +23,7 @@ def call(parent_status, collection_or_uri, allow_synchronous_requests: true, req private - def collection_items(collection_or_uri) + def collection_items(collection_or_uri, fetch_all: false) collection = fetch_collection(collection_or_uri) return unless collection.is_a?(Hash) @@ -39,8 +41,8 @@ def collection_items(collection_or_uri) all_items.concat(as_array(items)) - # Quit early if we are not fetching all replies - break if all_items.size >= MAX_REPLIES + # Quit early if we are not fetching all replies or we've reached the absolute max + break if (!fetch_all && all_items.size >= MAX_REPLIES) || (all_items.size >= FETCH_ALL_MAX_REPLIES) collection = collection['next'].present? ? fetch_collection(collection['next']) : nil end diff --git a/app/workers/activitypub/fetch_all_replies_worker.rb b/app/workers/activitypub/fetch_all_replies_worker.rb index af2cf40e91ca1d..222736224b2439 100644 --- a/app/workers/activitypub/fetch_all_replies_worker.rb +++ b/app/workers/activitypub/fetch_all_replies_worker.rb @@ -11,7 +11,7 @@ class ActivityPub::FetchAllRepliesWorker sidekiq_options queue: 'pull', retry: 3 - # Global max replies to fetch per request + # Global max replies to fetch per request (all replies, recursively) MAX_REPLIES = 1000 def perform(parent_status_id, current_account_id = nil, options = {}) @@ -20,21 +20,21 @@ def perform(parent_status_id, current_account_id = nil, options = {}) @current_account = @current_account_id.nil? ? nil : Account.find(@current_account_id) Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: Fetching all replies for status: #{@parent_status}" } - all_replies = get_replies(@parent_status.uri, options) - got_replies = all_replies.length - until all_replies.empty? || got_replies >= MAX_REPLIES - next_reply = all_replies.pop + uris_to_fetch = get_replies(@parent_status.uri, options) + fetched_uris = uris_to_fetch + until uris_to_fetch.empty? || fetched_uris.length >= MAX_REPLIES + next_reply = uris_to_fetch.pop next if next_reply.nil? - new_replies = get_replies(next_reply, options) - next if new_replies.nil? + new_reply_uris = get_replies(next_reply, options) + next if new_reply_uris.nil? - got_replies += new_replies.length - all_replies.concat(new_replies) + uris_to_fetch.concat(new_reply_uris) + fetched_uris.concat(new_reply_uris) end - Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{got_replies} replies" } - got_replies + Rails.logger.debug { "FetchAllRepliesWorker - #{parent_status_id}: fetched #{fetched_uris.length} replies" } + fetched_uris end private