Skip to content

Commit

Permalink
SRCH-1514 update gems (#589)
Browse files Browse the repository at this point in the history
* Bump websocket-extensions from 0.1.4 to 0.1.5 (#585)
* Bump puma from 3.12.4 to 3.12.6 (#581)
* bump webmock from 3.1.1 to 3.8.3
* remove web-console gem (#598)
* Bump rack from 2.2.2 to 2.2.3
* remove sanitize gem (#600)
- convert QuerySanitizer to Sanitizer
- replace deprecated .success? matcher with .successful? in specs
- add missing unit tests
- remove unsafe HTML tags when sanitizing
  • Loading branch information
MothOnMars authored Jul 16, 2020
1 parent 650878b commit b9d85f1
Show file tree
Hide file tree
Showing 27 changed files with 226 additions and 90 deletions.
6 changes: 2 additions & 4 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ gem 'resque-scheduler', '~> 4.3.1'
gem 'cocaine', '~> 0.5.8'
gem 'paperclip', '~> 5.2.0'
gem 'googlecharts', '~> 1.6.12'
gem 'sanitize', '~> 4.6.4'
gem 'tweetstream', '~> 2.6.1' # no longer maintained?
gem 'twitter', '~> 5.5'
gem 'flickraw', '~> 0.9.9'
Expand Down Expand Up @@ -86,7 +85,7 @@ gem 'dogstatsd-ruby', '~> 3.2.0'
gem 'test-unit', '~> 3.2.7'
gem 'http', '~> 1.0'
gem 'robots_tag_parser', '~> 0.1.0', git: 'https://github.com/GSA/robots_tag_parser'
gem 'loofah', '~> 2.3.1'
gem 'loofah', '~> 2.6.0'
# Using custom branch until https://github.com/brutuscat/medusa/issues/10 is merged
gem 'medusa', git: 'https://github.com/MothOnMars/medusa', branch: 'clean_urls'
# Robotex is required by Medusa. Specifying fork until https://github.com/chriskite/robotex/issues/4
Expand Down Expand Up @@ -129,7 +128,6 @@ gem 'font-awesome-grunticon-rails',
# put test-only gems in this group so their generators
# and rake tasks are available in development mode:
group :development do
gem 'web-console', '~> 2.0'
gem 'spring', '~> 2.0'
gem 'listen', '~> 3.1.5'
end
Expand Down Expand Up @@ -165,7 +163,7 @@ group :test do
gem 'shoulda-matchers', '~> 4.1.1'
gem 'shoulda-kept-assign-to', '~> 1.1.0'
gem 'vcr', '~> 4.0'
gem 'webmock', '~> 3.1.1'
gem 'webmock', '~> 3.8.3'
gem 'rspec-activemodel-mocks', '~> 1.0.3'
gem 'rspec_junit_formatter', '~> 0.3.0'
gem 'rails-controller-testing', '~> 1.0.4'
Expand Down
36 changes: 10 additions & 26 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,6 @@ GEM
bcrypt-ruby (3.1.5)
bcrypt (>= 3.1.3)
bindata (2.4.4)
binding_of_caller (0.8.0)
debug_inspector (>= 0.0.1)
buftok (0.2.0)
builder (3.2.4)
byebug (11.0.1)
Expand Down Expand Up @@ -271,7 +269,6 @@ GEM
curb (0.9.10)
daemons (1.3.1)
database_cleaner (1.7.0)
debug_inspector (0.0.3)
declarative (0.0.10)
declarative-option (0.1.0)
descendants_tracker (0.0.4)
Expand Down Expand Up @@ -390,7 +387,7 @@ GEM
haml (5.0.4)
temple (>= 0.8.0)
tilt
hashdiff (0.4.0)
hashdiff (1.0.1)
hashie (3.3.2)
highline (2.0.2)
html_truncator (0.4.2)
Expand Down Expand Up @@ -457,7 +454,7 @@ GEM
logging (2.2.2)
little-plugger (~> 1.1)
multi_json (~> 1.10)
loofah (2.3.1)
loofah (2.6.0)
crass (~> 1.0.2)
nokogiri (>= 1.5.9)
mail (2.7.1)
Expand Down Expand Up @@ -494,10 +491,8 @@ GEM
net-http-persistent (2.9.4)
newrelic_rpm (5.0.0.342)
nio4r (2.5.2)
nokogiri (1.10.9)
nokogiri (1.10.10)
mini_portile2 (~> 2.4.0)
nokogumbo (1.5.0)
nokogiri
oj (3.3.10)
omniauth (1.4.3)
hashie (>= 1.2, < 4)
Expand Down Expand Up @@ -531,9 +526,9 @@ GEM
pry-rails (0.3.9)
pry (>= 0.10.4)
public_suffix (3.1.1)
puma (3.12.4)
puma (3.12.6)
raabro (1.1.6)
rack (2.2.2)
rack (2.2.3)
rack-accept (0.4.5)
rack (>= 0.4)
rack-contrib (2.1.0)
Expand Down Expand Up @@ -678,10 +673,6 @@ GEM
rufus-scheduler (3.6.0)
fugit (~> 1.1, >= 1.1.6)
safe_yaml (1.0.5)
sanitize (4.6.6)
crass (~> 1.0.2)
nokogiri (>= 1.4.4)
nokogumbo (~> 1.4)
sass (3.3.14)
sass-rails (5.0.7)
railties (>= 4.0.0, < 6)
Expand Down Expand Up @@ -782,18 +773,13 @@ GEM
coercible (~> 1.0)
descendants_tracker (~> 0.0, >= 0.0.3)
equalizer (~> 0.0, >= 0.0.9)
web-console (2.3.0)
activemodel (>= 4.0)
binding_of_caller (>= 0.7.2)
railties (>= 4.0)
sprockets-rails (>= 2.0, < 4.0)
webmock (3.1.1)
webmock (3.8.3)
addressable (>= 2.3.6)
crack (>= 0.3.2)
hashdiff
hashdiff (>= 0.4.0, < 2.0.0)
websocket-driver (0.7.2)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.4)
websocket-extensions (0.1.5)
will_paginate (3.1.7)
will_paginate-bootstrap (1.0.2)
will_paginate (>= 3.0.3)
Expand Down Expand Up @@ -863,7 +849,7 @@ DEPENDENCIES
launchy (~> 2.4.3)
less-rails-bootstrap!
listen (~> 3.1.5)
loofah (~> 2.3.1)
loofah (~> 2.6.0)
medusa!
mobile-fu (~> 1.4.0)
mry (= 0.59.0.0)
Expand Down Expand Up @@ -907,7 +893,6 @@ DEPENDENCIES
rspec-rails (~> 3.8.2)
rspec_junit_formatter (~> 0.3.0)
rubocop (= 0.60.0)
sanitize (~> 4.6.4)
sass (~> 3.3.0)
sass-rails (~> 5.0.7)
saxerator (~> 0.9.9)
Expand All @@ -931,8 +916,7 @@ DEPENDENCIES
validate_url (= 0.2.0)
vcr (~> 4.0)
virtus (~> 1.0.5)
web-console (~> 2.0)
webmock (~> 3.1.1)
webmock (~> 3.8.3)
will_paginate (~> 3.1.6)
will_paginate-bootstrap (~> 1.0.1)
yajl-ruby (~> 1.3.1)
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/application_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def query_search_options
end

def sanitize_query(query)
QuerySanitizer.sanitize(query)
Sanitizer.sanitize(query, encode: false)
end

def highlighting_option
Expand Down
2 changes: 2 additions & 0 deletions app/controllers/image_searches_controller.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

class ImageSearchesController < ApplicationController
include MobileFriendlyController

Expand Down
2 changes: 2 additions & 0 deletions app/models/boosted_content.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

class BoostedContent < ApplicationRecord
extend HumanAttributeName
include BestBet
Expand Down
7 changes: 5 additions & 2 deletions app/models/elastic_featured_collection_results.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,11 @@ def highlight_instance(highlight, instance)

def highlight_link_titles(highlighted_link_titles, instance)
highlighted_link_titles.each do |link_title|
fcl = instance.featured_collection_links.detect { |fcl| fcl.title == Sanitize.clean(link_title) }
fcl = instance.featured_collection_links.detect do |fcl|
fcl.title == Sanitizer.sanitize(link_title)
end

fcl.title = link_title if fcl
end
end
end
end
2 changes: 1 addition & 1 deletion app/models/indexed_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def index_application_file(file_path, doctype)
end

def extract_body_from(nokogiri_doc)
body = scrub_inner_text(Sanitize.clean(nokogiri_doc.at('body').inner_html.encode('utf-8'))) rescue ''
body = scrub_inner_text(Sanitizer.sanitize(nokogiri_doc.at('body').inner_html.encode('utf-8'))) rescue ''
raise IndexedDocumentError.new(EMPTY_BODY_STATUS) if body.blank?
body
end
Expand Down
6 changes: 4 additions & 2 deletions app/models/med_topic.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

class MedTopic < ApplicationRecord
MAX_MED_TOPIC_SUMMARY_LENGTH = 200
MEDLINE_BASE_URL = 'https://medlineplus.gov/'
Expand Down Expand Up @@ -148,8 +150,8 @@ def self.search_for(title, locale = 'en')
end

def truncated_summary
sentences = Sanitize.clean(summary_html).gsub(/[[:space:]]/, ' ').squish.split(/\.\s*/)
summary = ''
sentences = Sanitizer.sanitize(summary_html).gsub(/[[:space:]]/, ' ').split(/\.\s*/)
summary = +''

sentences.slice(0,3).each do |sentence|
break if (summary.length + sentence.length + 1) > MAX_MED_TOPIC_SUMMARY_LENGTH
Expand Down
2 changes: 1 addition & 1 deletion app/models/tweet.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class Tweet < ApplicationRecord
serialize :urls, Array

def sanitize_tweet_text
self.tweet_text = Sanitize.clean(tweet_text).squish if tweet_text
self.tweet_text = Sanitizer.sanitize(tweet_text).squish if tweet_text
end

def url_to_tweet
Expand Down
2 changes: 1 addition & 1 deletion features/support/paths.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def path_to(page_name)
when /^(.*)'s strictui search page$/
search_path(:affiliate => $1, :strictui => "1")
when /^(.*)'s search page with unsanitized "([^\"]*)" query$/
search_path(:affiliate => $1, :query => "<script>#{$2}</script>")
search_path(:affiliate => $1, :query => "<b>#{$2}</b><script>script</script>")
when /^(.*)'s search page with site limited to "([^\"]*)"$/
search_path(:affiliate => $1, :sitelimit => $2)
when /^(.*)'s image search page$/
Expand Down
10 changes: 5 additions & 5 deletions lib/api/search_options.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
require 'sanitize'
# frozen_string_literal: true

class Api::SearchOptions
include ActiveModel::Validations

LIMIT_ERROR_MESSAGE_TEMPLATE = 'must be between %s and %s'.freeze
LIMIT_ERROR_MESSAGE_TEMPLATE = 'must be between %s and %s'
class_attribute :default_limit,
:limit_range

Expand All @@ -12,7 +12,7 @@ class Api::SearchOptions

OFFSET_RANGE = (0..1000).freeze
DEFAULT_OFFSET = 0
OFFSET_ERROR_MESSAGE = "must be between #{OFFSET_RANGE.first} and #{OFFSET_RANGE.last}".freeze
OFFSET_ERROR_MESSAGE = "must be between #{OFFSET_RANGE.first} and #{OFFSET_RANGE.last}"
QUERY_PARAMS = %i(query query_not query_or query_quote)

attr_accessor :access_key,
Expand All @@ -29,7 +29,7 @@ class Api::SearchOptions
:filter

validates_presence_of :access_key,
:affiliate ,
:affiliate,
message: 'must be present'

validates_length_of :query,
Expand Down Expand Up @@ -68,7 +68,7 @@ def initialize(params = {})
self.filter = params[:filter]

QUERY_PARAMS.each do |param|
self.send("#{param}=", QuerySanitizer.sanitize(params[param]))
self.send("#{param}=", Sanitizer.sanitize(params[param], encode: false))
end
end

Expand Down
4 changes: 3 additions & 1 deletion lib/attribute_processor.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

module AttributeProcessor
def self.prepend_attributes_with_http(record, *url_attribute_names)
url_attribute_names.each do |attr_name|
Expand All @@ -9,7 +11,7 @@ def self.prepend_attributes_with_http(record, *url_attribute_names)
def self.sanitize_attributes(record, *attribute_names)
attribute_names.each do |attr_name|
value = record.send :"#{attr_name}"
record.send :"#{attr_name}=", Sanitize.clean(value)
record.send :"#{attr_name}=", Sanitizer.sanitize(value)
end
end

Expand Down
10 changes: 6 additions & 4 deletions lib/importers/rss_feed_data.rb
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# frozen_string_literal: true

class RssFeedData
include RssFeedParser

Expand Down Expand Up @@ -166,11 +168,11 @@ def link_status_code_404?(link)
def extract_body(item)
body = nil
raw_body = extract_element_content item, elements[:body]
body = Sanitize.clean(raw_body) if raw_body
body = Sanitizer.sanitize(raw_body) if raw_body

if body.blank? and @has_media_ns
media_body = extract_element(item, :media_text)
body = Sanitize.clean(media_body) if media_body
body = Sanitizer.sanitize(media_body) if media_body
end
body
end
Expand All @@ -180,11 +182,11 @@ def extract_description(item)
elements[:description].each do |description_path|
break if (raw_description = extract_element_content item, description_path)
end
description = Sanitize.clean(raw_description) if raw_description
description = Sanitizer.sanitize(raw_description) if raw_description

if description.blank? and @has_media_ns
media_description = extract_element(item, :media_description)
description = Sanitize.clean(media_description) if media_description
description = Sanitizer.sanitize(media_description) if media_description
end
description
end
Expand Down
7 changes: 0 additions & 7 deletions lib/query_sanitizer.rb

This file was deleted.

10 changes: 10 additions & 0 deletions lib/sanitizer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# frozen_string_literal: true

module Sanitizer
def self.sanitize(str, encode: true)
Loofah.fragment(str).scrub!(:prune).text(encode_special_chars: encode).squish
rescue ArgumentError => e
Rails.logger.error("Error sanitizing string #{str}: #{e}")
nil
end
end
10 changes: 5 additions & 5 deletions spec/controllers/image_searches_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
describe '#index' do
context 'when searching on legacy affiliate and the query is present' do
let(:affiliate) { affiliates(:basic_affiliate) }
let(:query) { '<script>thunder & lightning</script>' }
let(:query) { '<b>thunder & lightning</b>' }
let(:image_search) do
double(LegacyImageSearch,
query: 'thunder & lightning',
Expand All @@ -27,7 +27,7 @@
context 'for a live search' do
before do
get :index, params: { affiliate: 'nps.gov',
query: '<script>thunder & lightning</script>' }
query: query }
end

it { is_expected.to assign_to(:search).with(image_search) }
Expand All @@ -51,7 +51,7 @@
context 'for a staged search' do
before do
get :index, params: { affiliate: 'nps.gov',
query: '<script>thunder & lightning</script>',
query: query,
staged: 'true' }
end

Expand All @@ -67,7 +67,7 @@
expect(image_search).to receive(:to_json).and_return(search_results_json)
get :index,
params: { affiliate: 'nps.gov',
query: '<script>thunder & lightning</script>' },
query: query },
format: :json
end

Expand Down Expand Up @@ -113,7 +113,7 @@
context 'when searching normally' do
before do
get :index,
params: { query: '<script>weather</script>',
params: { query: '<b>weather</b>',
affiliate: 'usagov' },
format: 'json'
@search = assigns[:search]
Expand Down
2 changes: 1 addition & 1 deletion spec/controllers/searches_controller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@
get :index,
params: {
affiliate: affiliate.name,
query: '<script>thunder & lightning</script>'
query: '<b>thunder & lightning</b>'
}
@search = assigns[:search]
@page_title = assigns[:page_title]
Expand Down
Loading

0 comments on commit b9d85f1

Please sign in to comment.