Skip to content

Commit

Permalink
Merge pull request #259 from scientist-softserv/ocr-count-correction
Browse files Browse the repository at this point in the history
🐛 Correct search count
  • Loading branch information
kirkkwang authored Jul 13, 2023
2 parents 4a3caaf + 660d98b commit b61644c
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 5 deletions.
27 changes: 24 additions & 3 deletions app/models/iiif_print/iiif_search_response_decorator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,37 @@ module IiifSearchResponseDecorator
# @see https://github.com/scientist-softserv/louisville-hyku/commit/67467e5cf9fdb755f54419f17d3c24c87032d0af
def annotation_list
json_results = super
resources = json_results&.[]('resources')

resources&.each do |result_hit|
# Check and process invalid hit
if json_results&.[]('resources')
remove_invalid_hit(json_results)
add_metadata_match(json_results)
end

json_results
end

def remove_invalid_hit(json_results)
resources = json_results['resources']
invalid_hit = resources.detect { |resource| resource["on"].include?(IiifPrint::BlacklightIiifSearch::AnnotationDecorator::INVALID_MATCH_TEXT) }
return unless invalid_hit

# Delete invalid hit from resources, remove first hit (which is from the invalid hit), decrement total within
resources.delete(invalid_hit)
json_results['hits'].shift
json_results['within']['total'] -= 1
end

def add_metadata_match(json_results)
json_results['resources'].each do |result_hit|
next if result_hit['resource'].present?

# Add resource details if not present
result_hit['resource'] = {
"@type": "cnt:ContentAsText",
"chars": "Metadata match, see sidebar for details"
}
end
json_results
end
end
end
38 changes: 36 additions & 2 deletions lib/iiif_print/blacklight_iiif_search/annotation_decorator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
module IiifPrint
module BlacklightIiifSearch
module AnnotationDecorator
INVALID_MATCH_TEXT = "#xywh=INVALID,INVALID,INVALID,INVALID".freeze
##
# Create a URL for the annotation
# use a Hyrax-y URL syntax:
Expand All @@ -28,19 +29,28 @@ def canvas_uri_for_annotation
# @return [String]
def coordinates
return default_coords if query.blank?

sanitized_query = sanitize_query
coords_json = fetch_and_parse_coords
return default_coords unless coords_json && coords_json['coords']
sanitized_query = query.match(additional_query_terms_regex)[1].strip
return derived_coords_json_and_properties(sanitized_query) unless coords_json && coords_json['coords']

query_terms = sanitized_query.split(' ').map(&:downcase)

matches = coords_json['coords'].select do |k, _v|
k.downcase =~ /(#{query_terms.join('|')})/
end
return default_coords if matches.blank?

coords_array = matches.values.flatten(1)[hl_index]
return default_coords unless coords_array

"#xywh=#{coords_array.join(',')}"
end

def sanitize_query
query.match(additional_query_terms_regex)[1].strip
end

##
# return the JSON word-coordinates file contents
# @return [JSON]
Expand All @@ -54,6 +64,23 @@ def fetch_and_parse_coords
end
end

# This is a bit hacky but it is checking if any of the properties contain the query term
# if there are no coords and there is a metadata property match
# then we return the default coords
# else we insert a invalid match text to be stripped out at a later point
# @see IiifPrint::IiifSearchResponseDecorator#annotation_list
def derived_coords_json_and_properties(sanitized_query)
property = @document.keys.detect do |key|
(key.ends_with?("_tesim") || key.ends_with?("_tsim")) && property_includes_sanitized_query?(key, sanitized_query)
end

property ? default_coords : INVALID_MATCH_TEXT
end

def property_includes_sanitized_query?(property, sanitized_query)
@document[property].join.downcase.include?(sanitized_query)
end

##
# a default set of coordinates
# @return [String]
Expand Down Expand Up @@ -97,6 +124,13 @@ def file_set_id
def additional_query_terms_regex
/(.*)(?= AND (\(.+\)|\w+)$)/
end

##
# @return [IIIF::Presentation::Resource]
def text_resource_for_annotation
IIIF::Presentation::Resource.new('@type' => 'cnt:ContentAsText',
'chars' => sanitize_query)
end
end
end
end

0 comments on commit b61644c

Please sign in to comment.