diff --git a/app/models/searchgov_url.rb b/app/models/searchgov_url.rb index 5b1e2e0903..3526952dfe 100644 --- a/app/models/searchgov_url.rb +++ b/app/models/searchgov_url.rb @@ -42,7 +42,9 @@ class SearchgovUrl < ActiveRecord::Base scope :fetch_required, -> do where('last_crawled_at IS NULL OR lastmod > last_crawled_at - OR enqueued_for_reindex') + OR enqueued_for_reindex + OR (last_crawl_status = "OK" AND last_crawled_at < ?)', 1.month.ago). + order(last_crawled_at: :ASC) end class SearchgovUrlError < StandardError; end diff --git a/spec/fixtures/searchgov_urls.yml b/spec/fixtures/searchgov_urls.yml index fa0fef99d0..3ce3662890 100644 --- a/spec/fixtures/searchgov_urls.yml +++ b/spec/fixtures/searchgov_urls.yml @@ -16,3 +16,8 @@ enqueued: last_crawled_at: <%= 1.day.ago.to_s(:db) %> lastmod: <%= 1.week.ago.to_s(:db) %> enqueued_for_reindex: true + +crawled_more_than_month: + url: http://www.agency.gov/crawled_more_than_month + last_crawled_at: <%= 45.days.ago.to_s(:db) %> + last_crawl_status: 'OK' \ No newline at end of file diff --git a/spec/models/searchgov_url_spec.rb b/spec/models/searchgov_url_spec.rb index 5a3213571d..c8212b2c68 100644 --- a/spec/models/searchgov_url_spec.rb +++ b/spec/models/searchgov_url_spec.rb @@ -39,6 +39,11 @@ expect(SearchgovUrl.fetch_required.pluck(:url)). to include 'http://www.agency.gov/enqueued' end + + it 'includes urls last crawled more than 30 days and crawl status is ok' do + expect(SearchgovUrl.fetch_required.pluck(:url)). + to include 'http://www.agency.gov/crawled_more_than_month' + end end end diff --git a/spec/support/fetchable_behavior.rb b/spec/support/fetchable_behavior.rb index d821684c7b..1f5fe4bc99 100644 --- a/spec/support/fetchable_behavior.rb +++ b/spec/support/fetchable_behavior.rb @@ -68,7 +68,7 @@ describe '.ok' do it 'includes successfully fetched records' do expect(described_class.ok.pluck(:url)). - to match_array ['http://agency.gov/ok'] + to include 'http://agency.gov/ok' end end