Skip to content

Commit

Permalink
SRCH-202 - scrub job keywords from query (#187)
Browse files Browse the repository at this point in the history
* SRCH-179 - Add a boolean column to the searchgov_urls

* Feature/bool db column urls (#172)

* update structure.sql

* migration file for enqueued for reindex to searachgov_url

* update migration, update test cases

* migrated database

* SRCH-202 - scrub job keywords from query

* update yml to get key

* update jobs.rb to have options in the jobs module

* change job_scrub to scrub_keyword, squish instead of strip, delete instead of gsub

* update jobs_spec

* remove scrub_keyword

* changes made to tighten up the regex

* fix word boundaries, update tests
  • Loading branch information
peggles2 authored Dec 4, 2018
1 parent 731dd7a commit 4970a01
Show file tree
Hide file tree
Showing 7 changed files with 11,365 additions and 25 deletions.
16 changes: 7 additions & 9 deletions app/models/govbox_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -104,22 +104,20 @@ def init_news_items

def init_jobs
if @affiliate.jobs_enabled?
job_results = Jobs.search(build_jobs_search_options)&.search_result&.search_result_items
job_results = Jobs.search({
query: @query,
organization_codes: @affiliate.agency&.joined_organization_codes,
location_name: @geoip_info&.location_name,
results_per_page: 10
})&.search_result&.search_result_items

if job_results.present?
@jobs = JobResultsPostProcessor.new(results: job_results)&.post_processed_results
@modules << 'JOBS'
end
end
end

def build_jobs_search_options
jobs_options = { Keyword: @query, ResultsPerPage: 10 }
org_hash = { Organization: @affiliate.agency&.joined_organization_codes }
jobs_options.merge!(org_hash)
jobs_options.merge!(LocationName: @geoip_info&.location_name)
jobs_options.compact
end

def init_federal_register_documents
if @affiliate.is_federal_register_document_govbox_enabled? &&
@affiliate.agency && @affiliate.agency.federal_register_agency.present?
Expand Down
832 changes: 832 additions & 0 deletions features/vcr_cassettes/Searches_using_mobile_device/Job_search.yml

Large diffs are not rendered by default.

20 changes: 17 additions & 3 deletions lib/jobs.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
module Jobs
SIMPLE_SEARCHES = '(job|employment|internship)s?'
JOB_RELATED_KEYWORDS = '((position|opening|posting|job|employment|intern(ship)?|seasonal|trabajo|puesto|empleo|vacante)s?|(opportunit|vacanc)(y|ies))|(posicion|ocupacion|oportunidad|federal)es|gobierno'
JOB_RELATED_KEYWORDS = '((position|opening|posting|job|employment|intern(ship)?|seasonal|trabajo|puesto|empleo|vacante)s?|(opportunit|vacanc)(y|ies))|(posicion|ocupacion|oportunidad|federal)(es)?|gobierno'
SCRUB_KEYWORDS = JOB_RELATED_KEYWORDS.remove(/\|intern\(ship\)|\|seasonal|\|federal\||gobierno/)
SIMPLE_SINGULARS = %w{
statistic number level rate description trend growth projection survey forecast figure report verification record
authorization card classification form hazard poster fair board outlook grant funding factor other cut
Expand All @@ -27,8 +28,14 @@ def self.establish_connection!
end
end

def self.search(options)
@usajobs_api_connection.get(@endpoint, options).body if query_eligible?(options[:Keyword])
def self.scrub_query(query)
query.remove(/\b#{SCRUB_KEYWORDS}\b/i).squish
end

def self.search(job_options)
if query_eligible?(job_options[:query])
@usajobs_api_connection.get(@endpoint, params(job_options)).body
end
rescue => error
Rails.logger.error("Trouble fetching jobs information: #{error}")
nil
Expand All @@ -38,6 +45,13 @@ def self.query_eligible?(query)
query =~ /\b#{JOB_RELATED_KEYWORDS}\b/i && !(query =~ /\b#{BLOCKED_KEYWORDS}\b/i) && !(query =~ /["():]|^-| -\S+/)
end

def self.params(options)
{ Keyword: scrub_query(options[:query]),
Organization: options[:organization_codes],
LocationName: options[:location_name],
ResultsPerPage: options[:results_per_page] }
end

end

Jobs.establish_connection!
58 changes: 54 additions & 4 deletions spec/lib/jobs_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,25 @@

describe Jobs do
describe '.search(options)' do
subject(:search) { Jobs.search(Keyword: 'jobs') }

subject(:search) do
Jobs.search({ query:'Nursing jobs',
organization_codes: 'HE38',
location_name: 'Washington, DC, USA',
results_per_page: 10 })
end
let(:usajobs_url) { 'https://data.usajobs.gov/api/search' }
it 'returns results' do
expect(search.search_result.search_result_count).to eq 10
expect(search.search_result.search_result_count).to be > 0
end

it 'searches USAJOBS with the correct params' do
search
expect(a_request(:get, usajobs_url).with(
query: { Keyword: 'Nursing',
Organization: 'HE38',
LocationName: 'Washington, DC, USA',
ResultsPerPage: 10 }
)).to have_been_made
end

context "when there is some problem" do
Expand All @@ -21,8 +36,43 @@
end
end

describe '.scrub_query(query)' do
context 'when the search phrase contains a job related keyword' do
it 'returns the query without generic job keywords' do
expect(Jobs.scrub_query('Nursing jobs')).to eq('Nursing')
end

it 'returns blank when the query is a generic job keyword' do
%w[ position opening posting job employment trabajo puesto empleo
vacante opportunity vacancy posicion ocupacion oportunidad].each do |query|
expect(Jobs.scrub_query(query)).to eq('')
end
end

it 'returns job related keyword if the query is the same, and not a generic job keyword.' do
expect(Jobs.scrub_query('internship')).to eq('internship')
end

it 'returns blank when the query only contains generic job keywords.' do
expect(Jobs.scrub_query('job posting')).to eq('')
end

it 'returns the job related keyword when the query is a job related keyword and generic job keyword' do
expect(Jobs.scrub_query('internship job')).to eq('internship')
end

it 'does include the job related keyword if it is part of another word' do
expect(Jobs.scrub_query('grand reopening')).to eq('grand reopening')
end

it 'is case sensitive when scrubbing queries' do
expect(Jobs.scrub_query('JoB')).to eq('')
end
end
end

describe '.query_eligible?(query)' do
context "when the search phrase contains hyphenated words" do
context 'when the search phrase contains hyphenated words' do
it 'should return true' do
expect(Jobs.query_eligible?('full-time jobs')).to be true
expect(Jobs.query_eligible?('intern')).to be true
Expand Down
16 changes: 9 additions & 7 deletions spec/models/govbox_set_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -161,8 +161,7 @@
end

it "returns job results" do
expect(govbox_set.jobs.first.position_title).to eq('Therapy Assistant')
expect(govbox_set.jobs.first.locations).to eq(['Lyons, New Jersey'])
expect(govbox_set.jobs.count).to be > 0
end

context "when the affiliate has a related agency with an org code" do
Expand All @@ -172,17 +171,20 @@

it "should call Jobs.search with the query, org codes, results per page, and location_name params" do
expect(Jobs).to receive(:search).
with(Keyword: 'job',
ResultsPerPage: 10,
Organization: 'ABCD;BCDE',
LocationName: 'Flemington, New Jersey, United States')
with(query: 'job',
organization_codes: 'ABCD;BCDE',
results_per_page: 10,
location_name: 'Flemington, New Jersey, United States')
govbox_set = GovboxSet.new('job', affiliate, geoip_info)
end
end

context "when the affiliate does not have a related agency with an org code" do
it 'calls Jobs.search with just the query, results per page' do
expect(Jobs).to receive(:search).with(Keyword: 'job', ResultsPerPage: 10).and_return nil
expect(Jobs).to receive(:search).with(query: 'job',
organization_codes: nil,
results_per_page: 10,
location_name: nil).and_return nil
GovboxSet.new('job', affiliate, nil)
end
end
Expand Down
Loading

0 comments on commit 4970a01

Please sign in to comment.