-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #326 from sul-dlss/exhibits-prod-alt
Add new exhibits schema with a string-type id field
- Loading branch information
Showing
8 changed files
with
1,842 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
{ | ||
"initArgs":{}, | ||
"managedList":[]} |
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# Punctuation characters we want to ignore as terms (i.e., when surrounded | ||
# by whitespace in a query, like 'fred : the puppy') in queries | ||
# ONLY FOR SINGLE TOKEN ANALYZED FIELDS | ||
# see https://issues.apache.org/jira/browse/SOLR-3085 | ||
# Note that hyphens, plusses, and double hyphens are not treated as terms | ||
# per debugQuery | ||
: | ||
; | ||
& | ||
/ | ||
= | ||
> | ||
< | ||
, | ||
. | ||
( | ||
) | ||
… | ||
» | ||
§ | ||
• | ||
· |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
# http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory | ||
# A synonym file for Solr SynonymFilterFactory. | ||
# Needs to be included at both index and query time | ||
# AFTER the case folding | ||
# BEFORE the WordDelimiterFilterFactory that removes punctuation | ||
# e.g. | ||
# <analyzer> | ||
# <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||
# <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed --> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.WordDelimiterFilterFactory" ... | ||
# ... | ||
# | ||
# !!! | ||
# !!! IMPORTANT: see also synonyms_both_anchors.txt, synonyms_left_anchor.txt and synonyms_right_anchor.txt | ||
# !!! | ||
|
||
# Explicit mappings match any token sequence on the LHS of "=>" | ||
# and replace with all alternatives on the RHS. These types of mappings | ||
# ignore the expand parameter in the schema. | ||
# Equivalent synonyms may be separated with commas and give | ||
# no explicit mapping. In this case the mapping behavior will | ||
# be taken from the expand parameter in the schema. | ||
# If expand==true, "ipod, i-pod, i pod" is equivalent to the explicit mapping: | ||
# ipod, i-pod, i pod => ipod, i-pod, i pod | ||
# If expand==false, "ipod, i-pod, i pod" is equivalent to the explicit mapping: | ||
# ipod, i-pod, i pod => ipod | ||
# set expand to true for index time and false for query time | ||
|
||
# See SW-845 | ||
# "Dept." will change to "Department" | ||
# "Koran" will change to "Qur'an" | ||
# "violoncello" will change to "cello" | ||
# "O.T." and "N.T." will change to "Old Testament" and "New Testament" | ||
# note that mapping TO the abbreviation improves recall but reduces precision: | ||
# O.T. can mean Old Testament or overtime; dept could be a word in some | ||
# language. | ||
department => dept | ||
qurʼan, qur'an, quran, qorʼan, qor'an, qoran => koran | ||
violoncello, violincello => cello | ||
# multi-token synonyms, and synonyms with punctuation, can be problematic | ||
#old testament => o.t. | ||
#new testament => n.t. | ||
|
||
# The below is inspired by Jonathan Rochkind at Johns Hopkins University, 2013-04-15 | ||
|
||
# punctuation-including terms we want to whitelist protect and make searchable. | ||
# We do this by mapping them to unique tokens that do not include punctuation | ||
|
||
# computer languages | ||
# these are explicit mappings so when WDF drops the non-letter chars, c++ is not equivalent to c | ||
c++ => cplusplus | ||
j#, j♯ => jsssharp | ||
# c# and f# are music keys as well as computer languages | ||
|
||
# musical keys | ||
# these are explicit mappings so when WDF drops the non-letter chars, c# is not equivalent to c | ||
# We map from number-sign (#), musical sharp (♯) | ||
a#, a♯, a-sharp => a sharp | ||
b#, b♯, b-sharp => b sharp | ||
c#, c♯, c-sharp => c sharp | ||
d#, d♯, d-sharp => d sharp | ||
e#, e♯, e-sharp => e sharp | ||
f#, f♯, f-sharp => f sharp | ||
g#, g♯, g-sharp => g sharp | ||
# We map both from lowercase b and musical flat (♭) | ||
ab, a♭, a-flat => a flat | ||
bb, b♭, b-flat => b flat | ||
cb, c♭, c-flat => c flat | ||
db, d♭, d-flat => d flat | ||
eb, e♭, e-flat => e flat | ||
fb, f♭, f-flat => f flat | ||
gb, g♭, g-flat => g flat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Include in analysis with both left anchor of 'aaaaaa' and right anchor of 'zzzzzz' | ||
# for query or field comprised solely of token meant to be a synonym | ||
# | ||
# http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory | ||
# A synonym file for Solr SynonymFilterFactory. | ||
# Needs to be included at both index and query time | ||
# AFTER the case folding | ||
# BEFORE the WordDelimiterFilterFactory that removes punctuation | ||
# e.g. | ||
# <analyzer> | ||
# <!-- put beginning and ending anchors on field value, removing trailing chars --> | ||
# <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[^\s\.\:\;\/\[\]])[\s\.\:\;\/\[\]]*$" replacement="aaaaaa$1zzzzzz"/> | ||
# <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||
# <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed --> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.WordDelimiterFilterFactory" ... | ||
# ... | ||
# | ||
|
||
aaaaaadepartmentzzzzzz => aaaaaadeptzzzzzz | ||
aaaaaaqurʼanzzzzzz, aaaaaaqur'anzzzzzz, aaaaaaquranzzzzzz, aaaaaaqorʼanzzzzzz, aaaaaaqor'anzzzzzz, aaaaaaqoranzzzzzz => aaaaaakoranzzzzzz | ||
aaaaaavioloncellozzzzzz, aaaaaaviolincellozzzzzz => aaaaaacellozzzzzz | ||
|
||
# computer languages | ||
aaaaaac++zzzzzz => aaaaaacpluspluszzzzzz | ||
aaaaaaj#zzzzzz, aaaaaaj♯zzzzzz => aaaaaajsssharpzzzzzz | ||
|
||
# musical keys | ||
# We map from number-sign (#), musical sharp (♯) | ||
aaaaaaa#zzzzzz, aaaaaaa♯zzzzzz, aaaaaaa-sharpzzzzzz => aaaaaaa sharpzzzzzz | ||
aaaaaab#zzzzzz, aaaaaab♯zzzzzz, aaaaaab-sharpzzzzzz => aaaaaab sharpzzzzzz | ||
aaaaaac#zzzzzz, aaaaaac♯zzzzzz, aaaaaac-sharpzzzzzz => aaaaaac sharpzzzzzz | ||
aaaaaad#zzzzzz, aaaaaad♯zzzzzz, aaaaaad-sharpzzzzzz => aaaaaad sharpzzzzzz | ||
aaaaaae#zzzzzz, aaaaaae♯zzzzzz, aaaaaae-sharpzzzzzz => aaaaaae sharpzzzzzz | ||
aaaaaaf#zzzzzz, aaaaaaf♯zzzzzz, aaaaaaf-sharpzzzzzz => aaaaaaf sharpzzzzzz | ||
aaaaaag#zzzzzz, aaaaaag♯zzzzzz, aaaaaag-sharpzzzzzz => aaaaaag sharpzzzzzz | ||
# We map both from lowercase b and musical flat (♭) | ||
aaaaaaabzzzzzz, aaaaaaa♭zzzzzz, aaaaaaa-flatzzzzzz => aaaaaaa flatzzzzzz | ||
aaaaaabbzzzzzz, aaaaaab♭zzzzzz, aaaaaab-flatzzzzzz => aaaaaab flatzzzzzz | ||
aaaaaacbzzzzzz, aaaaaac♭zzzzzz, aaaaaac-flatzzzzzz => aaaaaac flatzzzzzz | ||
aaaaaadbzzzzzz, aaaaaad♭zzzzzz, aaaaaad-flatzzzzzz => aaaaaad flatzzzzzz | ||
aaaaaaebzzzzzz, aaaaaae♭zzzzzz, aaaaaae-flatzzzzzz => aaaaaae flatzzzzzz | ||
aaaaaafbzzzzzz, aaaaaaf♭zzzzzz, aaaaaaf-flatzzzzzz => aaaaaaf flatzzzzzz | ||
aaaaaagbzzzzzz, aaaaaag♭zzzzzz, aaaaaag-flatzzzzzz => aaaaaag flatzzzzzz |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
# Include in analysis with left anchor of 'aaaaaa' | ||
# for query or field beginning with token meant to be a synonym | ||
# | ||
# http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory | ||
# A synonym file for Solr SynonymFilterFactory. | ||
# Needs to be included at both index and query time | ||
# AFTER the case folding | ||
# BEFORE the WordDelimiterFilterFactory that removes punctuation | ||
# e.g. | ||
# <analyzer> | ||
# <!-- put beginning anchor on field value, assume first non-whitespace char is unicode letter or number or symbol --> | ||
# <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*([\p{L}\p{N}\p{S}]{1})" replacement="aaaaaa$1"/> | ||
# <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||
# <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed --> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.WordDelimiterFilterFactory" ... | ||
# ... | ||
# | ||
|
||
aaaaaadepartment => aaaaaadept | ||
aaaaaaqurʼan, aaaaaaqur'an, aaaaaaquran, aaaaaaqorʼan, aaaaaaqor'an, aaaaaaqoran => aaaaaakoran | ||
aaaaaavioloncello, aaaaaaviolincello => aaaaaacello | ||
|
||
# computer languages | ||
aaaaaac++ => aaaaaacplusplus | ||
aaaaaaj#, aaaaaaj♯ => aaaaaajsssharp | ||
|
||
# musical keys | ||
# We map from number-sign (#), musical sharp (♯) | ||
aaaaaaa#, aaaaaaa♯, aaaaaaa-sharp => aaaaaaa sharp | ||
aaaaaab#, aaaaaab♯, aaaaaab-sharp => aaaaaab sharp | ||
aaaaaac#, aaaaaac♯, aaaaaac-sharp => aaaaaac sharp | ||
aaaaaad#, aaaaaad♯, aaaaaad-sharp => aaaaaad sharp | ||
aaaaaae#, aaaaaae♯, aaaaaae-sharp => aaaaaae sharp | ||
aaaaaaf#, aaaaaaf♯, aaaaaaf-sharp => aaaaaaf sharp | ||
aaaaaag#, aaaaaag♯, aaaaaag-sharp => aaaaaag sharp | ||
# We map both from lowercase b and musical flat (♭) | ||
aaaaaaab, aaaaaaa♭, aaaaaaa-flat => aaaaaaa flat | ||
aaaaaabb, aaaaaab♭, aaaaaab-flat => aaaaaab flat | ||
aaaaaacb, aaaaaac♭, aaaaaac-flat => aaaaaac flat | ||
aaaaaadb, aaaaaad♭, aaaaaad-flat => aaaaaad flat | ||
aaaaaaeb, aaaaaae♭, aaaaaae-flat => aaaaaae flat | ||
aaaaaafb, aaaaaaf♭, aaaaaaf-flat => aaaaaaf flat | ||
aaaaaagb, aaaaaag♭, aaaaaag-flat => aaaaaag flat |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# Include in analysis with right anchor of 'zzzzzz' | ||
# for query or field ending with token meant to be a synonym | ||
# | ||
# http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters#solr.SynonymFilterFactory | ||
# A synonym file for Solr SynonymFilterFactory. | ||
# Needs to be included at both index and query time | ||
# AFTER the case folding | ||
# BEFORE the WordDelimiterFilterFactory that removes punctuation | ||
# e.g. | ||
# <analyzer> | ||
# <!-- put beginning and ending anchors on field value, removing trailing chars --> | ||
# <charFilter class="solr.PatternReplaceCharFilterFactory" pattern="^\s*(.*[^\s\.\:\;\/\[\]])[\s\.\:\;\/\[\]]*$" replacement="aaaaaa$1zzzzzz"/> | ||
# <tokenizer class="solr.WhitespaceTokenizerFactory" /> | ||
# <filter class="solr.ICUFoldingFilterFactory"/> <!-- NFKC, case folding, diacritics removed --> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_both_anchors.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_left_anchor.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.SynonymFilterFactory" synonyms="synonyms_right_anchor.txt" ignoreCase="true" expand="false"/> | ||
# <filter class="solr.WordDelimiterFilterFactory" ... | ||
# ... | ||
# | ||
|
||
departmentzzzzzz => deptzzzzzz | ||
qurʼanzzzzzz, qur'anzzzzzz, quranzzzzzz, qorʼanzzzzzz, qor'anzzzzzz, qoranzzzzzz => koranzzzzzz | ||
violoncellozzzzzz, violincellozzzzzz => cellozzzzzz | ||
|
||
# computer languages | ||
c++zzzzzz => cpluspluszzzzzz | ||
j#zzzzzz, j♯zzzzzz => jsssharpzzzzzz | ||
|
||
# musical keys | ||
# We map from number-sign (#), musical sharp (♯) | ||
a#zzzzzz, a♯zzzzzz, a-sharpzzzzzz => a sharpzzzzzz | ||
b#zzzzzz, b♯zzzzzz, b-sharpzzzzzz => b sharpzzzzzz | ||
c#zzzzzz, c♯zzzzzz, c-sharpzzzzzz => c sharpzzzzzz | ||
d#zzzzzz, d♯zzzzzz, d-sharpzzzzzz => d sharpzzzzzz | ||
e#zzzzzz, e♯zzzzzz, e-sharpzzzzzz => e sharpzzzzzz | ||
f#zzzzzz, f♯zzzzzz, f-sharpzzzzzz => f sharpzzzzzz | ||
g#zzzzzz, g♯zzzzzz, g-sharpzzzzzz => g sharpzzzzzz | ||
# We map both from lowercase b and musical flat (♭) | ||
abzzzzzz, a♭zzzzzz, a-flatzzzzzz => a flatzzzzzz | ||
bbzzzzzz, b♭zzzzzz, b-flatzzzzzz => b flatzzzzzz | ||
cbzzzzzz, c♭zzzzzz, c-flatzzzzzz => c flatzzzzzz | ||
dbzzzzzz, d♭zzzzzz, d-flatzzzzzz => d flatzzzzzz | ||
ebzzzzzz, e♭zzzzzz, e-flatzzzzzz => e flatzzzzzz | ||
fbzzzzzz, f♭zzzzzz, f-flatzzzzzz => f flatzzzzzz | ||
gbzzzzzz, g♭zzzzzz, g-flatzzzzzz => g flatzzzzzz |