From 7a7e7bbefdc3169b6e7cc4aa198ca6601ed0ee71 Mon Sep 17 00:00:00 2001 From: peggles2 Date: Wed, 19 Dec 2018 11:27:23 -0500 Subject: [PATCH] Feature/automate reindexing (#206) * SRCH-126 - Automate reindexing --- app/models/searchgov_url.rb | 4 +++- spec/fixtures/searchgov_urls.yml | 5 +++++ spec/models/searchgov_url_spec.rb | 5 +++++ spec/support/fetchable_behavior.rb | 2 +- 4 files changed, 14 insertions(+), 2 deletions(-) diff --git a/app/models/searchgov_url.rb b/app/models/searchgov_url.rb index 5b1e2e0903..3526952dfe 100644 --- a/app/models/searchgov_url.rb +++ b/app/models/searchgov_url.rb @@ -42,7 +42,9 @@ class SearchgovUrl < ActiveRecord::Base scope :fetch_required, -> do where('last_crawled_at IS NULL OR lastmod > last_crawled_at - OR enqueued_for_reindex') + OR enqueued_for_reindex + OR (last_crawl_status = "OK" AND last_crawled_at < ?)', 1.month.ago). + order(last_crawled_at: :ASC) end class SearchgovUrlError < StandardError; end diff --git a/spec/fixtures/searchgov_urls.yml b/spec/fixtures/searchgov_urls.yml index fa0fef99d0..3ce3662890 100644 --- a/spec/fixtures/searchgov_urls.yml +++ b/spec/fixtures/searchgov_urls.yml @@ -16,3 +16,8 @@ enqueued: last_crawled_at: <%= 1.day.ago.to_s(:db) %> lastmod: <%= 1.week.ago.to_s(:db) %> enqueued_for_reindex: true + +crawled_more_than_month: + url: http://www.agency.gov/crawled_more_than_month + last_crawled_at: <%= 45.days.ago.to_s(:db) %> + last_crawl_status: 'OK' \ No newline at end of file diff --git a/spec/models/searchgov_url_spec.rb b/spec/models/searchgov_url_spec.rb index 5a3213571d..c8212b2c68 100644 --- a/spec/models/searchgov_url_spec.rb +++ b/spec/models/searchgov_url_spec.rb @@ -39,6 +39,11 @@ expect(SearchgovUrl.fetch_required.pluck(:url)). to include 'http://www.agency.gov/enqueued' end + + it 'includes urls last crawled more than 30 days and crawl status is ok' do + expect(SearchgovUrl.fetch_required.pluck(:url)). + to include 'http://www.agency.gov/crawled_more_than_month' + end end end diff --git a/spec/support/fetchable_behavior.rb b/spec/support/fetchable_behavior.rb index d821684c7b..1f5fe4bc99 100644 --- a/spec/support/fetchable_behavior.rb +++ b/spec/support/fetchable_behavior.rb @@ -68,7 +68,7 @@ describe '.ok' do it 'includes successfully fetched records' do expect(described_class.ok.pluck(:url)). - to match_array ['http://agency.gov/ok'] + to include 'http://agency.gov/ok' end end