Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync relevancy changes from gryphon-search to -prod. #350

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions searchworks-prod-20230907/schema.xml
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@

<!-- Title Search Fields -->
<field name="title_245a_exact_search" type="text_anchored" indexed="true" stored="false" />
<field name="title_245a_ws_search" type="text_ws" indexed="true" stored="false" />
<field name="title_245a_search" type="text" indexed="true" stored="true" />
<field name="vern_title_245a_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_245a_unstem_search" type="textNoStem" indexed="true" stored="false" />
<field name="title_245_ws_search" type="text_ws" indexed="true" stored="false" />
<field name="title_245_search" type="text" indexed="true" stored="true" />
<field name="vern_title_245_search" type="textNoStem" indexed="true" stored="true" />
<field name="title_245_unstem_search" type="textNoStem" indexed="true" stored="false" />
Expand Down Expand Up @@ -89,6 +91,7 @@
<field name="author_title_search" type="text" indexed="true" stored="true" multiValued="true" />
<field name="author_title_unstem_search" type="textNoStem" indexed="true" stored="false" multiValued="true" />
<field name="best_author_title_search" type="text" indexed="true" stored="true" multiValued="false" />
<field name="best_author_title_unstem_search" type="textNoStem" indexed="true" stored="false" multiValued="false" />

<!-- Author Search Fields -->
<field name="author_1xx_search" type="text" indexed="true" stored="true" />
Expand Down Expand Up @@ -300,14 +303,17 @@
<copyField source="barcode_search" dest="item_barcodes" />
<!-- unstemmed and anchored search fields: title -->
<copyField source="title_245a_search" dest="title_245a_exact_search" />
<copyField source="title_245a_search" dest="title_245a_ws_search" />
<copyField source="title_245a_search" dest="title_245a_unstem_search" />
<copyField source="title_245_search" dest="title_245_unstem_search" />
<copyField source="title_245_search" dest="title_245_ws_search" />
<copyField source="title_uniform_search" dest="title_uniform_unstem_search" />
<copyField source="title_variant_search" dest="title_variant_unstem_search" />
<copyField source="title_related_search" dest="title_related_unstem_search" />
<copyField source="author_title_search" dest="author_title_unstem_search" />
<copyField source="author_title_245ac_search" dest="author_title_245ac_unstem_search" />
<copyField source="author_title_1xx_search" dest="author_title_1xx_unstem_search" />
<copyField source="best_author_title_search" dest="best_author_title_unstem_search" />
<!-- unstemmed search fields: author -->
<copyField source="author_1xx_search" dest="author_1xx_unstem_search" />
<copyField source="author_7xx_search" dest="author_7xx_unstem_search" />
Expand Down Expand Up @@ -495,6 +501,24 @@
</analyzer>
</fieldtype>

<!-- Analyzed Text, exact string matching -->
<fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer type="index">
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+([&quot;\[{(])" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([;:.,&quot;\]})]+)\s+" replacement=" " />
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.ICUNormalizer2FilterFactory" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(\p{Punct}+)\s+" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\s+(&quot;)" replacement=" " />
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="([;:.,&quot;]+}+)\s+" replacement=" " />
<tokenizer class="solr.WhitespaceTokenizerFactory" />
<filter class="solr.ICUNormalizer2FilterFactory" />
</analyzer>
</fieldtype>

<!-- Left and Right Anchored Analyzed Text, no Stemming -->
<fieldtype name="text_anchored" class="solr.TextField" positionIncrementGap="10000" autoGeneratePhraseQueries="true">
<analyzer type="index">
Expand Down
70 changes: 33 additions & 37 deletions searchworks-prod-20230907/solrconfig.xml
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,8 @@
See the License for the specific language governing permissions and
limitations under the License.
-->

<!--
This is a stripped down config file used for a simple example...
It is *not* a good example to work from.
-->
<config>
<luceneMatchVersion>8.0.0</luceneMatchVersion>
<luceneMatchVersion>8.11.2</luceneMatchVersion>
<!-- The DirectoryFactory to use for indexes.
solr.StandardDirectoryFactory, the default, is filesystem based.
solr.RAMDirectoryFactory is memory based, not persistent, and doesn't work with replication. -->
Expand Down Expand Up @@ -258,10 +253,16 @@
<!-- in case lucene query parser -->
<str name="df">all_search</str>
<str name="q.op">AND</str>
<str name="sow">true</str>
<str name="sow">false</str>

<!-- bump up results with a single-term matching in the 245a -->
<str name="bq">{!dismax sow=true qf=title_245a_exact_search^500 pf='' pf2='' pf3='' bf='' bq='' v=$q mm=1}</str>

<str name="qf">
title_245a_exact_search^1700
title_245a_ws_search^700
title_245a_unstem_search^500
title_245_ws_search^85
title_245_unstem_search^75
title_245_search^50 vern_title_245_search^50
series_search^10
Expand All @@ -276,6 +277,9 @@
topic_search^20
db_az_subject_search^5

bib_unstem_search^20
bib_search^10 vern_bib_search^5

pub_date_search^2
isbn_search^1.6
issn_search^1.6
Expand All @@ -290,8 +294,10 @@

<str name="qf_single_term">
title_245a_exact_search^1000
title_245a_ws_search^700
title_245a_unstem_search^500
title_245a_search^75 vern_title_245a_search^75
title_245_ws_search^85
title_245_unstem_search^75
title_245_search^50 vern_title_245_search^50
title_uniform_unstem_search^50
Expand All @@ -305,8 +311,10 @@
series_search^2 vern_series_search^2

<!-- give some boosts to push them on par with titles + phrase boosts -->
best_author_title_unstem_search^500
author_title_1xx_unstem_search^250
author_title_245ac_unstem_search^100
best_author_title_search^75
author_title_1xx_search^50
author_title_245ac_search^30
author_title_unstem_search^75
Expand Down Expand Up @@ -359,8 +367,10 @@
</str>
<str name="pf"> <!-- (phrase boost within result set) -->
title_245a_exact_search^5000
title_245a_ws_search^3000
title_245a_unstem_search^2500
title_245a_search^375 vern_title_245a_search^375
title_245_ws_search^425
title_245_unstem_search^375
title_245_search^250 vern_title_245_search^250
title_uniform_unstem_search^250
Expand All @@ -369,12 +379,14 @@
title_variant_search^75 vern_title_variant_search^75
title_related_unstem_search^75
title_related_search^50 vern_title_related_search^50
series_exact_search^50
series_unstem_search^25
series_exact_search^300
series_unstem_search^275
series_search^10 vern_series_search^10

best_author_title_unstem_search^2000
author_title_1xx_unstem_search^1000
author_title_245ac_unstem_search^500
best_author_title_search^375
author_title_1xx_search^300
author_title_245ac_search^270
author_title_unstem_search^250
Expand Down Expand Up @@ -421,7 +433,11 @@
all_search^5 vern_all_search^5
</str>
<str name="pf3"> <!-- (token trigrams boost within result set) -->
title_245a_search^2200 vern_title_245a_search^2200
title_245a_ws_search^2700
title_245a_unstem_search^2500
title_245_ws_search^1500
title_245_unstem_search^1200
title_245a_search^1400 vern_title_245a_search^1400
title_245_search^225 vern_title_245_search^225
title_uniform_search^150 vern_title_uniform_search^150
title_variant_search^60 vern_title_variant_search^60
Expand Down Expand Up @@ -459,7 +475,11 @@
all_search^3 vern_all_search^3
</str>
<str name="pf2"> <!--(token bigrams boost within result set) -->
title_245a_search^1700 vern_title_245a_search^1700
title_245a_ws_search^2000
title_245a_unstem_search^1700
title_245_ws_search^1250
title_245_unstem_search^1000
title_245a_search^500 vern_title_245a_search^500
title_245_search^150 vern_title_245_search^150
title_uniform_search^100 vern_title_uniform_search^100
title_variant_search^40 vern_title_variant_search^40
Expand Down Expand Up @@ -683,8 +703,10 @@

<str name="qf_title">
title_245a_exact_search^380
title_245a_ws_search^170
title_245a_unstem_search^130
title_245a_search^75 vern_title_245a_search^75
title_245_ws_search^85
title_245_unstem_search^75
title_245_search^50 vern_title_245_search^50
title_uniform_unstem_search^50
Expand Down Expand Up @@ -952,20 +974,6 @@
series_search^2 vern_series_search^2 cjk_series_search^2 ja_series_search^2
</str>


<!-- for course reserves searches -->
<str name="qf_crez">
crez_instructor_search
crez_course_name_search
crez_course_id_search
</str>
<str name="pf_crez">
crez_instructor_search^2
crez_course_name_search^2
</str>
<str name="sort_crez">score desc, title_sort asc, pub_date_sort desc</str>


<!-- for advanced search publisher text box -->
<str name="qf_pub_info">
pub_search vern_pub_search
Expand Down Expand Up @@ -1076,7 +1084,6 @@
collection,
collection_type,
collection_with_title,
crez_course_info,
courses_folio_id_ssim,
db_az_subject,
druid,
Expand All @@ -1090,7 +1097,6 @@
iiif_manifest_url_ssim,
imprint_display,
isbn_display,
item_display,
lccn,
lc_assigned_callnum_ssim,
mhld_display,
Expand Down Expand Up @@ -1127,7 +1133,6 @@
ht_htid_ssim,
ht_bib_key_ssim,
characteristics_ssim,
holdings_library_code_ssim,
holdings_json_struct:[json],
author_struct:[json],
marc_links_struct:[json],
Expand Down Expand Up @@ -1164,15 +1169,6 @@
</lst>
</requestHandler>

<!-- barcode requests; use /barcode?q=1234567890 -->
<requestHandler name="/barcode" class="solr.SearchHandler" >
<lst name="defaults">
<str name="echoParams">explicit</str>
<str name="fl">id</str>
<str name="q">{!field f=barcode_search v=$n}</str>
</lst>
</requestHandler>

<!-- used to get consecutive terms for browsing -->
<searchComponent name="termsComp" class="solr.TermsComponent"/>
<requestHandler name="/alphaTerms" class="solr.SearchHandler">
Expand Down
Loading