From cba2dd400e6f021f2c2e8822718e78eef1a99509 Mon Sep 17 00:00:00 2001 From: Eric Larson Date: Mon, 6 Nov 2023 09:30:24 -0600 Subject: [PATCH] Image Harvest: skip :failed and :placeheld states We don't need to attempt image harvesting for placeheld or failed items. Addresses #465 --- config/schedule.rb | 2 +- lib/tasks/geoportal/geoportal_images.rake | 20 +++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/config/schedule.rb b/config/schedule.rb index 059ad6789..491ff5492 100644 --- a/config/schedule.rb +++ b/config/schedule.rb @@ -11,7 +11,7 @@ end # Harvest thumbnail images for search results every :day, at: '12:05am', roles: [:app] do - rake 'gblsci:images:harvest_retry' + rake 'geoportal:queue_incomplete_states' end every :day, at: '12:30am', roles: [:app] do rake 'sitemap:refresh' diff --git a/lib/tasks/geoportal/geoportal_images.rake b/lib/tasks/geoportal/geoportal_images.rake index fc9843aa9..ea823b40f 100644 --- a/lib/tasks/geoportal/geoportal_images.rake +++ b/lib/tasks/geoportal/geoportal_images.rake @@ -27,7 +27,25 @@ namespace :geoportal do desc 'Harvest images - Queue incomplete states for reprocessing' task queue_incomplete_states: :environment do - puts "Deprecated / Instead try: bundle exec rake gblsci:images:harvest_retry" + # Skipping :failed, :placeheld + states = [ + :initialized, + :queued, + :processing + ] + + states.each do |state| + sidecars = SolrDocumentSidecar.in_state(state) + + puts "#{state} - #{sidecars.size}" + + sidecars.each do |sc| + document = Geoblacklight::SolrDocument.find(sc.document_id) + GeoblacklightSidecarImages::StoreImageJob.perform_later(document.id) + rescue + puts "orphaned / #{sc.document_id}" + end + end end desc 'Failed State - Inspect metadata'