Skip to content

Commit

Permalink
Rake: add customized allmaps harvest task
Browse files Browse the repository at this point in the history
This uses our geomg_id_s as the sort field for cursor mark
  • Loading branch information
ewlarson committed Apr 18, 2024
1 parent 5bafdcc commit e851a6e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 1 deletion.
2 changes: 1 addition & 1 deletion config/schedule.rb
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
# Blacklight::Allmaps
# Harvest Maps
every :day, at: '3:30am', roles: [:app] do
rake 'blacklight_allmaps:sidecars:harvest:allmaps'
rake 'rake geoportal:allmaps:harvest'
end

# Populate the Georeferenced Facet
Expand Down
28 changes: 28 additions & 0 deletions lib/tasks/geoportal.rake
Original file line number Diff line number Diff line change
Expand Up @@ -92,4 +92,32 @@ namespace :geoportal do
Blacklight.default_index.connection.commit
end
end

# @CUSTOMIZATION
# Require geomg_id_s as sort parameter
namespace :allmaps do
desc "Sidecars - Harvest: Crawl SolrDocuments to store Allmaps data locally"
task harvest: [:environment] do
cursor_mark = "*"
loop do
response = Blacklight.default_index.connection.get(
"select", params: {
q: "*:*", # all docs
fl: "id", # just id field
cursorMark: cursor_mark, # use the cursor mark to handle paging
rows: 1000,
sort: "geomg_id_s asc" # must sort by id to use the cursor mark
}
)

response["response"]["docs"].each do |doc|
puts "Harvesting Allmaps data for #{doc["id"]}"
Blacklight::Allmaps::StoreSidecarAnnotation.perform_later(doc["id"])
end

break if response["nextCursorMark"] == cursor_mark # this means the result set is finished
cursor_mark = response["nextCursorMark"]
end
end
end
end

0 comments on commit e851a6e

Please sign in to comment.