diff --git a/build.properties b/build.properties
index a5b6998..2bbe503 100644
--- a/build.properties
+++ b/build.properties
@@ -13,7 +13,7 @@
# limitations under the License.
name=cascading.solr
-version=2.6-SNAPSHOT
+version=2.6-SOLR6
jar.name=${ant.project.name}-${version}.jar
@@ -36,7 +36,7 @@ build.dir.test-reports=${build.dir}/test
javac.debug=on
javac.optimize=on
javac.deprecation=off
-javac.version=1.7
+javac.version=1.8
javac.args=
javac.args.warnings=-Xlint:none
build.encoding=UTF-8
diff --git a/pom.xml b/pom.xml
index 18a6791..3c75fd6 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
cascading.solrCascading Scheme for Apache Solrjar
- 2.6-SNAPSHOT
+ 2.6-SOLR6Cascading Scheme for creating Lucene indexes using Solrhttp://github.com/ScaleUnlimited/cascading.solr
@@ -34,9 +34,9 @@
- 2.5.6
- 2.2.0
- 4.10.1
+ 2.7.1
+ 2.6.0
+ 6.6.2
@@ -84,6 +84,12 @@
+
+ commons-io
+ commons-io
+ 2.5
+
+
org.slf4jslf4j-api
diff --git a/src/main/java/com/scaleunlimited/cascading/scheme/core/BinaryUpdateRequest.java b/src/main/java/com/scaleunlimited/cascading/scheme/core/BinaryUpdateRequest.java
index d0834dc..ed44223 100644
--- a/src/main/java/com/scaleunlimited/cascading/scheme/core/BinaryUpdateRequest.java
+++ b/src/main/java/com/scaleunlimited/cascading/scheme/core/BinaryUpdateRequest.java
@@ -47,9 +47,10 @@ public String getXML() throws IOException {
}
/**
+ * @return
* @since solr 1.4
*/
- public void writeXML( Writer writer ) throws IOException {
+ public UpdateRequest writeXML( Writer writer ) throws IOException {
throw new IllegalStateException("Can't write XML when using binary protocol");
}
diff --git a/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrSchemeUtil.java b/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrSchemeUtil.java
index 7f297b5..3428479 100644
--- a/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrSchemeUtil.java
+++ b/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrSchemeUtil.java
@@ -2,6 +2,7 @@
import java.io.File;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
@@ -19,52 +20,56 @@
public class SolrSchemeUtil {
- public static final String DEFAULT_DATA_DIR_PROPERTY_NAME = "solr.data.dir";
+ public static final String CORE_DIR_NAME = "core";
- public static File makeTempSolrHome(File solrCoreDir) throws IOException {
+ public static File makeTempSolrHome(File solrConfDir, File dataDir) throws IOException {
String tmpFolder = System.getProperty("java.io.tmpdir");
File tmpSolrHome = new File(tmpFolder, UUID.randomUUID().toString());
// Set up a temp location for Solr home, where we're write out a synthetic solr.xml
// that references the core directory.
- String coreName = solrCoreDir.getName();
- String corePath = solrCoreDir.getAbsolutePath();
- String solrXmlContent = String.format("",
- coreName, corePath);
File solrXmlFile = new File(tmpSolrHome, "solr.xml");
- FileUtils.write(solrXmlFile, solrXmlContent);
+ FileUtils.write(solrXmlFile, "", StandardCharsets.UTF_8);
+ File coreDir = new File(tmpSolrHome, CORE_DIR_NAME);
+ coreDir.mkdirs();
+
+ // Create the core.properties file with appropriate entries.
+ File coreProps = new File(coreDir, "core.properties");
+
+ StringBuilder props = new StringBuilder();
+ props.append("enable.special-handlers=false\n"); // All we need is the update request handler
+ props.append("enable.cache-warming=false\n"); // We certainly don't need to warm the cache
+
+ if (dataDir != null) {
+ props.append("dataDir=");
+ props.append(dataDir.getAbsolutePath());
+ props.append('\n');
+ }
+
+ FileUtils.write(coreProps, props.toString(), StandardCharsets.UTF_8);
+
+ // Copy over all of the conf/ dir files.
+ File destDir = new File(coreDir, "conf");
+ FileUtils.copyDirectory(solrConfDir, destDir);
return tmpSolrHome;
}
- public static void validate(File solrCoreDir, String dataDirPropertyName, Fields schemeFields) throws IOException {
+ public static void validate(File solrConfDir, Fields schemeFields) throws IOException {
- // Verify solrHomeDir exists
- if (!solrCoreDir.exists() || !solrCoreDir.isDirectory()) {
- throw new TapException("Solr core directory doesn't exist: " + solrCoreDir);
+ // Verify solrConfDir exists
+ if (!solrConfDir.exists() || !solrConfDir.isDirectory()) {
+ throw new TapException("Solr conf directory doesn't exist: " + solrConfDir);
}
- File tmpSolrHome = makeTempSolrHome(solrCoreDir);
-
- // Set up a temp location for Solr home, where we're write out a synthetic solr.xml
- // that references the core directory.
- String coreName = solrCoreDir.getName();
- String corePath = solrCoreDir.getAbsolutePath();
- String solrXmlContent = String.format("",
- coreName, corePath);
- File solrXmlFile = new File(tmpSolrHome, "solr.xml");
- FileUtils.write(solrXmlFile, solrXmlContent);
-
// Set up a temp location for data, so when we instantiate the coreContainer,
// we don't pollute the solr home with a /data sub-dir.
String tmpFolder = System.getProperty("java.io.tmpdir");
File tmpDataDir = new File(tmpFolder, UUID.randomUUID().toString());
tmpDataDir.mkdir();
-
- System.setProperty(dataDirPropertyName, tmpDataDir.getAbsolutePath());
- System.setProperty("enable.special-handlers", "false"); // All we need is the update request handler
- System.setProperty("enable.cache-warming", "false"); // We certainly don't need to warm the cache
-
+
+ // Create a temp solr home dir with a solr.xml and core.properties file to work off.
+ File tmpSolrHome = makeTempSolrHome(solrConfDir, tmpDataDir);
CoreContainer coreContainer = new CoreContainer(tmpSolrHome.getAbsolutePath());
try {
diff --git a/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrWriter.java b/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrWriter.java
index 93133bb..849ae9f 100644
--- a/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrWriter.java
+++ b/src/main/java/com/scaleunlimited/cascading/scheme/core/SolrWriter.java
@@ -4,18 +4,21 @@
import java.io.IOException;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.UpdateParams;
import org.apache.solr.core.CoreContainer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
public abstract class SolrWriter {
+ private static final Logger LOGGER = LoggerFactory.getLogger(SolrWriter.class);
+
// TODO KKr - make this configurable.
private static final int MAX_DOCS_PER_ADD = 500;
@@ -25,10 +28,10 @@ public abstract class SolrWriter {
private int _maxSegments;
private transient CoreContainer _coreContainer;
- private transient SolrServer _solrServer;
+ private transient EmbeddedSolrServer _solrServer;
private transient BinaryUpdateRequest _updateRequest;
- public SolrWriter(KeepAliveHook keepAlive, Fields sinkFields, String dataDirPropertyName, String dataDir, File solrCoreDir, int maxSegments) throws IOException {
+ public SolrWriter(KeepAliveHook keepAlive, Fields sinkFields, String dataDir, File solrConfDir, int maxSegments) throws IOException {
_keepAlive = keepAlive;
_sinkFields = sinkFields;
_maxSegments = maxSegments;
@@ -40,13 +43,10 @@ public SolrWriter(KeepAliveHook keepAlive, Fields sinkFields, String dataDirProp
// Fire up an embedded Solr server
try {
- System.setProperty(dataDirPropertyName, dataDir);
- System.setProperty("enable.special-handlers", "false"); // All we need is the update request handler
- System.setProperty("enable.cache-warming", "false"); // We certainly don't need to warm the cache
- File solrHome = SolrSchemeUtil.makeTempSolrHome(solrCoreDir);
+ File solrHome = SolrSchemeUtil.makeTempSolrHome(solrConfDir, new File(dataDir));
_coreContainer = new CoreContainer(solrHome.getAbsolutePath());
_coreContainer.load();
- _solrServer = new EmbeddedSolrServer(_coreContainer, solrCoreDir.getName());
+ _solrServer = new EmbeddedSolrServer(_coreContainer, SolrSchemeUtil.CORE_DIR_NAME);
} catch (Exception e) {
if (_coreContainer != null) {
_coreContainer.shutdown();
@@ -108,8 +108,8 @@ private void flushInputDocuments(boolean force) throws IOException {
_updateRequest.process(_solrServer);
if (force) {
- _solrServer.commit(true, true);
- _solrServer.optimize(true, true, _maxSegments);
+ _solrServer.commit(SolrSchemeUtil.CORE_DIR_NAME, true, true);
+ _solrServer.optimize(SolrSchemeUtil.CORE_DIR_NAME, true, true, _maxSegments);
}
} catch (SolrServerException e) {
throw new IOException(e);
diff --git a/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrOutputFormat.java b/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrOutputFormat.java
index 7a9d8d5..3fda3ff 100644
--- a/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrOutputFormat.java
+++ b/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrOutputFormat.java
@@ -27,10 +27,9 @@
public class SolrOutputFormat extends FileOutputFormat {
private static final Logger LOGGER = LoggerFactory.getLogger(SolrOutputFormat.class);
- public static final String SOLR_CORE_PATH_KEY = "com.scaleunlimited.cascading.solr.corePath";
+ public static final String SOLR_CONF_PATH_KEY = "com.scaleunlimited.cascading.solr.confPath";
public static final String SINK_FIELDS_KEY = "com.scaleunlimited.cascading.solr.sinkFields";
public static final String MAX_SEGMENTS_KEY = "com.scaleunlimited.cascading.solr.maxSegments";
- public static final String DATA_DIR_PROPERTY_NAME_KEY = "com.scaleunlimited.cascading.solr.dataDirPropertyName";
public static final int DEFAULT_MAX_SEGMENTS = 10;
@@ -45,13 +44,13 @@ private static class SolrRecordWriter implements RecordWriter {
public SolrRecordWriter(JobConf conf, String name, Progressable progress) throws IOException {
- // Copy Solr core directory from HDFS to temp local location.
- Path sourcePath = new Path(conf.get(SOLR_CORE_PATH_KEY));
- String coreName = sourcePath.getName();
+ // Copy Solr conf directory from HDFS to temp local location.
+ Path sourcePath = new Path(conf.get(SOLR_CONF_PATH_KEY));
+ String confName = sourcePath.getName();
String tmpDir = System.getProperty("java.io.tmpdir");
- File localSolrCore = new File(tmpDir, "cascading.solr-" + UUID.randomUUID() + "/" + coreName);
+ File localSolrConf = new File(tmpDir, "cascading.solr-" + UUID.randomUUID() + "/" + confName);
FileSystem sourceFS = sourcePath.getFileSystem(conf);
- sourceFS.copyToLocalFile(sourcePath, new Path(localSolrCore.getAbsolutePath()));
+ sourceFS.copyToLocalFile(sourcePath, new Path(localSolrConf.getAbsolutePath()));
// Figure out where ultimately the results need to wind up.
_outputPath = new Path(FileOutputFormat.getTaskOutputPath(conf, name), "index");
@@ -62,17 +61,15 @@ public SolrRecordWriter(JobConf conf, String name, Progressable progress) throws
int maxSegments = conf.getInt(MAX_SEGMENTS_KEY, DEFAULT_MAX_SEGMENTS);
- String dataDirPropertyName = conf.get(DATA_DIR_PROPERTY_NAME_KEY);
-
// Set up local Solr home.
- File localSolrHome = SolrSchemeUtil.makeTempSolrHome(localSolrCore);
+ File localSolrHome = SolrSchemeUtil.makeTempSolrHome(localSolrConf, null);
// This is where data will wind up, inside of an index subdir.
_localIndexDir = new File(localSolrHome, "data");
_keepAliveHook = new HadoopKeepAliveHook(progress);
- _solrWriter = new SolrWriter(_keepAliveHook, sinkFields, dataDirPropertyName, _localIndexDir.getAbsolutePath(), localSolrCore, maxSegments) { };
+ _solrWriter = new SolrWriter(_keepAliveHook, sinkFields, _localIndexDir.getAbsolutePath(), localSolrConf, maxSegments) { };
}
@Override
diff --git a/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrScheme.java b/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrScheme.java
index 13a6a9e..f8758a5 100644
--- a/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrScheme.java
+++ b/src/main/java/com/scaleunlimited/cascading/scheme/hadoop/SolrScheme.java
@@ -29,26 +29,20 @@
@SuppressWarnings("serial")
public class SolrScheme extends Scheme, OutputCollector, Object[], Void> {
- private File _solrCoreDir;
+ private File _solrConfDir;
private int _maxSegments;
- private String _dataDirPropertyName;
- public SolrScheme(Fields schemeFields, String solrCoreDir) throws IOException, ParserConfigurationException, SAXException {
- this(schemeFields, solrCoreDir, SolrOutputFormat.DEFAULT_MAX_SEGMENTS);
+ public SolrScheme(Fields schemeFields, String solrConfDir) throws IOException, ParserConfigurationException, SAXException {
+ this(schemeFields, solrConfDir, SolrOutputFormat.DEFAULT_MAX_SEGMENTS);
}
- public SolrScheme(Fields schemeFields, String solrCoreDir, int maxSegments) throws IOException, ParserConfigurationException, SAXException {
- this(schemeFields, solrCoreDir, SolrOutputFormat.DEFAULT_MAX_SEGMENTS, SolrSchemeUtil.DEFAULT_DATA_DIR_PROPERTY_NAME);
- }
-
- public SolrScheme(Fields schemeFields, String solrCoreDir, int maxSegments, String dataDirPropertyName) throws IOException, ParserConfigurationException, SAXException {
+ public SolrScheme(Fields schemeFields, String solrConfDir, int maxSegments) throws IOException, ParserConfigurationException, SAXException {
super(schemeFields, schemeFields);
- _solrCoreDir = new File(solrCoreDir);
+ _solrConfDir = new File(solrConfDir);
_maxSegments = maxSegments;
- _dataDirPropertyName = dataDirPropertyName;
- SolrSchemeUtil.validate(_solrCoreDir, _dataDirPropertyName, schemeFields);
+ SolrSchemeUtil.validate(_solrConfDir, schemeFields);
}
@Override
@@ -70,15 +64,15 @@ public void sourceConfInit(FlowProcess flowProcess, Tap flowProcess, Tap, OutputCollector> tap, JobConf conf) {
// Pick temp location in HDFS for conf files.
// TODO KKr - should I get rid of this temp directory when we're done?
- String coreDirname = _solrCoreDir.getName();
- Path hdfsSolrCoreDir = new Path(Hfs.getTempPath(conf), "solr-core-" + Util.createUniqueID() + "/" + coreDirname);
+ String confDirname = _solrConfDir.getName();
+ Path hdfsSolrConfDir = new Path(Hfs.getTempPath(conf), "solr-conf-" + Util.createUniqueID() + "/" + confDirname);
- // Copy Solr core directory into HDFS.
+ // Copy Solr conf directory into HDFS.
try {
- FileSystem fs = hdfsSolrCoreDir.getFileSystem(conf);
- fs.copyFromLocalFile(new Path(_solrCoreDir.getAbsolutePath()), hdfsSolrCoreDir);
+ FileSystem fs = hdfsSolrConfDir.getFileSystem(conf);
+ fs.copyFromLocalFile(new Path(_solrConfDir.getAbsolutePath()), hdfsSolrConfDir);
} catch (IOException e) {
- throw new TapException("Can't copy Solr core directory into HDFS", e);
+ throw new TapException("Can't copy Solr conf directory into HDFS", e);
}
conf.setOutputKeyClass(Tuple.class);
@@ -91,9 +85,8 @@ public void sinkConfInit(FlowProcess flowProcess, Tap flowProcess, Fields sinkFields, File solrCoreDir, int maxSegments, String dataDirPropertyName, String dataDir) throws IOException {
- super(new LocalKeepAliveHook(flowProcess), sinkFields, dataDirPropertyName, dataDir, solrCoreDir, maxSegments);
+ public SolrCollector(FlowProcess flowProcess, Fields sinkFields, File solrConfDir, int maxSegments, String dataDir) throws IOException {
+ super(new LocalKeepAliveHook(flowProcess), sinkFields, dataDir, solrConfDir, maxSegments);
}
public void collect(Tuple value) throws IOException {
diff --git a/src/main/java/com/scaleunlimited/cascading/scheme/local/SolrScheme.java b/src/main/java/com/scaleunlimited/cascading/scheme/local/SolrScheme.java
index 1e44d4a..5f76cc7 100644
--- a/src/main/java/com/scaleunlimited/cascading/scheme/local/SolrScheme.java
+++ b/src/main/java/com/scaleunlimited/cascading/scheme/local/SolrScheme.java
@@ -26,26 +26,20 @@ public class SolrScheme extends Scheme flowProcess, SinkCall makeSourceTap(Fields fields, String path);
@@ -42,12 +42,10 @@ public abstract class AbstractSolrSchemeTest extends Assert {
protected abstract Tap, ?, ?> makeSolrSink(Fields fields, String path) throws Exception;
protected abstract FlowConnector makeFlowConnector();
- protected abstract Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir) throws Exception;
-
- protected abstract Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir, int maxSegments) throws Exception;
-
- protected abstract Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir, int maxSegments, String dataDirPropertyName) throws Exception;
+ protected abstract Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrConfDir) throws Exception;
+ protected abstract Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrConfDir, int maxSegments) throws Exception;
+
@Before
public void setup() throws IOException {
File outputDir = new File(getTestDir());
@@ -75,7 +73,7 @@ protected void testSchemeChecksBadConf() throws Exception {
protected void testSchemeWrongFields() throws Exception {
try {
// Need to make sure we include the required fields.
- makeScheme(new Fields("id", "bogus-field"), SOLR_CORE_DIR);
+ makeScheme(new Fields("id", "bogus-field"), SOLR_CONF_DIR);
fail("Should have thrown exception");
} catch (TapException e) {
assert(e.getMessage().contains("field name doesn't exist"));
@@ -84,7 +82,7 @@ protected void testSchemeWrongFields() throws Exception {
protected void testSchemeMissingRequiredField() throws Exception {
try {
- makeScheme(new Fields("sku"), SOLR_CORE_DIR);
+ makeScheme(new Fields("sku"), SOLR_CONF_DIR);
fail("Should have thrown exception");
} catch (TapException e) {
assert(e.getMessage().contains("field name for required"));
@@ -95,7 +93,7 @@ protected void testIndexSink() throws Exception {
final Fields testFields = new Fields("id", "name", "price", "inStock");
String out = getTestDir() + "testIndexSink/out";
- DirectoryTap solrSink = new DirectoryTap(new SolrScheme(testFields, SOLR_CORE_DIR), out, SinkMode.REPLACE);
+ DirectoryTap solrSink = new DirectoryTap(new SolrScheme(testFields, SOLR_CONF_DIR), out, SinkMode.REPLACE);
TupleEntryCollector writer = solrSink.openForWrite(new LocalFlowProcess());
@@ -106,15 +104,16 @@ protected void testIndexSink() throws Exception {
writer.close();
}
+ @SuppressWarnings({ "rawtypes", "unchecked" })
protected void testSimpleIndexing() throws Exception {
final Fields testFields = new Fields("id", "name", "price", "cat", "inStock", "image");
- final String in = getTestDir() + "testSimpleIndexing/in";
- final String out = getTestDir() + "testSimpleIndexing/out";
+ final File in = new File(getTestDir() + "testSimpleIndexing/in");
+ final File out = new File(getTestDir() + "testSimpleIndexing/out");
byte[] imageData = new byte[] {0, 1, 2, 3, 5};
- Tap source = makeSourceTap(testFields, in);
+ Tap source = makeSourceTap(testFields, in.getAbsolutePath());
TupleEntryCollector write = source.openForWrite(makeFlowProcess());
Tuple t = new Tuple();
t.add(1);
@@ -141,19 +140,19 @@ protected void testSimpleIndexing() throws Exception {
// Now read from the results, and write to a Solr index.
Pipe writePipe = new Pipe("tuples to Solr");
- Tap solrSink = makeSolrSink(testFields, out);
+ Tap solrSink = makeSolrSink(testFields, out.getAbsolutePath());
Flow flow = makeFlowConnector().connect(source, solrSink, writePipe);
flow.complete();
// Open up the Solr index, and do some searches.
- System.setProperty("solr.data.dir", out + "/part-00000");
+ System.setProperty("solr.data.dir", new File(out,"/part-00000").getAbsolutePath());
CoreContainer coreContainer = new CoreContainer(SOLR_HOME_DIR);
coreContainer.load();
- SolrServer solrServer = new EmbeddedSolrServer(coreContainer, "");
+ EmbeddedSolrServer solrServer = new EmbeddedSolrServer(coreContainer, CORE_NAME);
ModifiableSolrParams params = new ModifiableSolrParams();
- params.set(CommonParams.Q, "turbowriter");
+ params.set(CommonParams.Q, "name:turbowriter");
QueryResponse res = solrServer.query(params);
assertEquals(1, res.getResults().size());
@@ -173,6 +172,8 @@ protected void testSimpleIndexing() throws Exception {
params.set(CommonParams.Q, "bogus");
res = solrServer.query(params);
assertEquals(0, res.getResults().size());
+
+ solrServer.close();
}
diff --git a/src/test/java/com/scaleunlimited/cascading/scheme/hadoop/SolrSchemeHadoopTest.java b/src/test/java/com/scaleunlimited/cascading/scheme/hadoop/SolrSchemeHadoopTest.java
index 2358b7d..272e9f0 100644
--- a/src/test/java/com/scaleunlimited/cascading/scheme/hadoop/SolrSchemeHadoopTest.java
+++ b/src/test/java/com/scaleunlimited/cascading/scheme/hadoop/SolrSchemeHadoopTest.java
@@ -45,23 +45,19 @@ protected FlowProcess> makeFlowProcess() {
}
@Override
- protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir) throws Exception {
- return new SolrScheme(schemeFields, solrCoreDir);
+ protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrConfDir) throws Exception {
+ return new SolrScheme(schemeFields, solrConfDir);
}
@Override
- protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir, int maxSegments) throws Exception {
- return new SolrScheme(schemeFields, solrCoreDir, maxSegments);
- }
-
- @Override
- protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir, int maxSegments, String dataDirPropertyName) throws Exception {
- return new SolrScheme(schemeFields, solrCoreDir, maxSegments, dataDirPropertyName);
+ protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrConfDir, int maxSegments) throws Exception {
+ return new SolrScheme(schemeFields, solrConfDir, maxSegments);
}
+ @SuppressWarnings({ "rawtypes", "unchecked" })
@Override
protected Tap, ?, ?> makeSolrSink(Fields fields, String path) throws Exception {
- Scheme scheme = new SolrScheme(fields, SOLR_CORE_DIR);
+ Scheme scheme = new SolrScheme(fields, SOLR_CONF_DIR);
return new Hfs(scheme, path, SinkMode.REPLACE);
}
diff --git a/src/test/java/com/scaleunlimited/cascading/scheme/local/SolrSchemeLocalTest.java b/src/test/java/com/scaleunlimited/cascading/scheme/local/SolrSchemeLocalTest.java
index 52d0302..4baf08c 100644
--- a/src/test/java/com/scaleunlimited/cascading/scheme/local/SolrSchemeLocalTest.java
+++ b/src/test/java/com/scaleunlimited/cascading/scheme/local/SolrSchemeLocalTest.java
@@ -1,10 +1,5 @@
package com.scaleunlimited.cascading.scheme.local;
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.commons.io.FileUtils;
-import org.junit.Before;
import org.junit.Test;
import cascading.flow.FlowConnector;
@@ -42,7 +37,7 @@ protected FlowProcess> makeFlowProcess() {
@Override
protected Tap, ?, ?> makeSolrSink(Fields fields, String path) throws Exception {
- return new DirectoryTap(new SolrScheme(fields, SOLR_CORE_DIR), path);
+ return new DirectoryTap(new SolrScheme(fields, SOLR_CONF_DIR), path);
}
@Override
@@ -51,18 +46,13 @@ protected FlowConnector makeFlowConnector() {
}
@Override
- protected cascading.scheme.Scheme,?,?,?,?> makeScheme(Fields schemeFields, String solrCoreDir) throws Exception {
- return new SolrScheme(schemeFields, solrCoreDir);
- }
-
- @Override
- protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir, int maxSegments) throws Exception {
- return new SolrScheme(schemeFields, solrCoreDir, maxSegments);
+ protected cascading.scheme.Scheme,?,?,?,?> makeScheme(Fields schemeFields, String solrConfDir) throws Exception {
+ return new SolrScheme(schemeFields, solrConfDir);
}
@Override
- protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrCoreDir, int maxSegments, String dataDirPropertyName) throws Exception {
- return new SolrScheme(schemeFields, solrCoreDir, maxSegments, dataDirPropertyName);
+ protected Scheme, ?, ?, ?, ?> makeScheme(Fields schemeFields, String solrConfDir, int maxSegments) throws Exception {
+ return new SolrScheme(schemeFields, solrConfDir, maxSegments);
}
@Test
diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
new file mode 100644
index 0000000..3b687e3
--- /dev/null
+++ b/src/test/resources/log4j.properties
@@ -0,0 +1,71 @@
+# Copyright 2010-2013 Scale Unlimited.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Define some default values that can be overridden by system properties
+my.root.level=INFO
+my.http.level=INFO
+my.appender=console
+
+# Define some default values that can be overridden by system properties
+my.root.logger=${my.root.level},${my.appender}
+my.console.pattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}:%L - %m%n
+my.http.level=INFO
+my.log.dir=./logs
+
+
+# Define the root logger to the system property "my.root.logger".
+log4j.rootLogger=${my.root.logger}
+
+# Logging Threshold
+log4j.threshhold=ALL
+
+#
+# console
+# Add "console" to my.root.logger above if you want to use this
+#
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.target=System.out
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=${my.console.pattern}
+
+# Custom Logging levels
+log4j.logger.org.apache.hadoop=WARN
+
+# Log from TupleLogger
+log4j.logger.com.scaleunlimited.cascading.TupleLogger=DEBUG
+
+# Allow hiding of wire output even when running at TRACE levels
+log4j.logger.org.apache.http.wire=${my.http.level}
+log4j.logger.org.apache.http.headers=${my.http.level}
+log4j.logger.org.apache.http=${my.http.level}
+
+# Hide HttpClient 3.1 output (from Heritrix archive reader code)
+log4j.logger.httpclient.wire.header=${my.http.level}
+
+# Hide a bunch of useless output that we get at the DEBUG level. These should
+# probably all be using trace(), not debug() to log their output.
+log4j.logger.cascading.flow.stack.FlowMapperStack=INFO
+log4j.logger.cascading.pipe.Each=INFO
+log4j.logger.cascading.pipe.Group=INFO
+log4j.logger.org.apache.hadoop.conf.Configuration=INFO
+log4j.logger.org.apache.http.impl.conn.tsccm.RefQueueWorker=INFO
+log4j.logger.org.mortbay.util.ThreadedServer=INFO
+
+# We get lots of warnings due to JMX config issues when running with mini-clusters
+log4j.logger.org.apache.hadoop.metrics2.util.MBeans=ERROR
+
+# Ignore warning about no native code.
+log4j.logger.org.apache.hadoop.util.NativeCodeLoader=ERROR
+
+#log4j.logger.org.apache.hadoop.fs.FSNamesystem=DEBUG
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/_rest_managed.json b/src/test/resources/solr-home-4.1/collection1/conf/_rest_managed.json
deleted file mode 100644
index e7ada3f..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/_rest_managed.json
+++ /dev/null
@@ -1,3 +0,0 @@
-{
- "initArgs":{},
- "managedList":[]}
\ No newline at end of file
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.html b/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.html
deleted file mode 100644
index 21b5090..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.html
+++ /dev/null
@@ -1,31 +0,0 @@
-
-
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.menu-bottom.html b/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.menu-bottom.html
deleted file mode 100644
index 9d9e1a5..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.menu-bottom.html
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.menu-top.html b/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.menu-top.html
deleted file mode 100644
index 4f694a0..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/admin-extra.menu-top.html
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/currency.xml b/src/test/resources/solr-home-4.1/collection1/conf/currency.xml
deleted file mode 100644
index 5c6cfaf..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/elevate.xml b/src/test/resources/solr-home-4.1/collection1/conf/elevate.xml
deleted file mode 100644
index e4769cc..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/elevate.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_ca.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_ca.txt
deleted file mode 100644
index 116644f..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_fr.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_fr.txt
deleted file mode 100644
index 6de73c2..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
-d
-c
-jusqu
-quoiqu
-lorsqu
-puisqu
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_ga.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_ga.txt
deleted file mode 100644
index 849b0c6..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_it.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_it.txt
deleted file mode 100644
index 790b5af..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/hyphenations_ga.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index fce4100..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stemdict_nl.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 01dd3bb..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stoptags_ja.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 2faf2a6..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#名詞
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#名詞-一般
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#名詞-固有名詞
-#
-# noun-proper-misc: miscellaneous proper nouns
-#名詞-固有名詞-一般
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#名詞-固有名詞-人名
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. お市の方
-#名詞-固有名詞-人名-一般
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. 山田
-#名詞-固有名詞-人名-姓
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. 太郎
-#名詞-固有名詞-人名-名
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. 通産省, NHK
-#名詞-固有名詞-組織
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#名詞-固有名詞-地域
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. アジア, バルセロナ, 京都
-#名詞-固有名詞-地域-一般
-#
-# noun-proper-place-country: Country names.
-# e.g. 日本, オーストラリア
-#名詞-固有名詞-地域-国
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#名詞-代名詞
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. それ, ここ, あいつ, あなた, あちこち, いくつ, どこか, なに, みなさん, みんな, わたくし, われわれ
-#名詞-代名詞-一般
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. ありゃ, こりゃ, こりゃあ, そりゃ, そりゃあ
-#名詞-代名詞-縮約
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. 金曜, 一月, 午後, 少量
-#名詞-副詞可能
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (する, できる, なさる, くださる)
-# e.g. インプット, 愛着, 悪化, 悪戦苦闘, 一安心, 下取り
-#名詞-サ変接続
-#
-# noun-adjective-base: The base form of adjectives, words that appear before な ("na")
-# e.g. 健康, 安易, 駄目, だめ
-#名詞-形容動詞語幹
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like 何 (回), 数.
-# e.g. 0, 1, 2, 何, 数, 幾
-#名詞-数
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#名詞-非自立
-#
-# noun-affix-misc: Of adnominalizers, the case-marker の ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. あかつき, 暁, かい, 甲斐, 気, きらい, 嫌い, くせ, 癖, こと, 事, ごと, 毎, しだい, 次第,
-# 順, せい, 所為, ついで, 序で, つもり, 積もり, 点, どころ, の, はず, 筈, はずみ, 弾み,
-# 拍子, ふう, ふり, 振り, ほう, 方, 旨, もの, 物, 者, ゆえ, 故, ゆえん, 所以, わけ, 訳,
-# わり, 割り, 割, ん-口語/, もん-口語/
-#名詞-非自立-一般
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. あいだ, 間, あげく, 挙げ句, あと, 後, 余り, 以外, 以降, 以後, 以上, 以前, 一方, うえ,
-# 上, うち, 内, おり, 折り, かぎり, 限り, きり, っきり, 結果, ころ, 頃, さい, 際, 最中, さなか,
-# 最中, じたい, 自体, たび, 度, ため, 為, つど, 都度, とおり, 通り, とき, 時, ところ, 所,
-# とたん, 途端, なか, 中, のち, 後, ばあい, 場合, 日, ぶん, 分, ほか, 他, まえ, 前, まま,
-# 儘, 侭, みぎり, 矢先
-#名詞-非自立-副詞可能
-#
-# noun-affix-aux: noun affixes treated as 助動詞 ("auxiliary verb") in school grammars
-# with the stem よう(だ) ("you(da)").
-# e.g. よう, やう, 様 (よう)
-#名詞-非自立-助動詞語幹
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form な (aux "da").
-# e.g. みたい, ふう
-#名詞-非自立-形容動詞語幹
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#名詞-特殊
-#
-# noun-special-aux: The そうだ ("souda") stem form that is used for reporting news, is
-# treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. そう
-#名詞-特殊-助動詞語幹
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#名詞-接尾
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to ガル or タイ and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# 接尾語 ("suffix") and is usually the last element in a compound noun.
-# e.g. おき, かた, 方, 甲斐 (がい), がかり, ぎみ, 気味, ぐるみ, (~した) さ, 次第, 済 (ず) み,
-# よう, (でき)っこ, 感, 観, 性, 学, 類, 面, 用
-#名詞-接尾-一般
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. 君, 様, 著
-#名詞-接尾-人名
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. 町, 市, 県
-#名詞-接尾-地域
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before スル ("suru").
-# e.g. 化, 視, 分け, 入り, 落ち, 買い
-#名詞-接尾-サ変接続
-#
-# noun-suffix-aux: The stem form of そうだ (様態) that is used to indicate conditions,
-# is treated as 助動詞 ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. そう
-#名詞-接尾-助動詞語幹
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula だ ("da").
-# e.g. 的, げ, がち
-#名詞-接尾-形容動詞語幹
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. 後 (ご), 以後, 以降, 以前, 前後, 中, 末, 上, 時 (じ)
-#名詞-接尾-副詞可能
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than 助数詞 ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. 個, つ, 本, 冊, パーセント, cm, kg, カ月, か国, 区画, 時間, 時半
-#名詞-接尾-助数詞
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (楽し) さ, (考え) 方
-#名詞-接尾-特殊
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (日本) 対 (アメリカ), 対 (アメリカ), (3) 対 (5), (女優) 兼 (主婦)
-#名詞-接続詞的
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle て ("te") and are
-# semantically verb-like.
-# e.g. ごらん, ご覧, 御覧, 頂戴
-#名詞-動詞非自立的
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for 名詞 引用文字列 ("noun quotation")
-# is いわく ("iwaku").
-#名詞-引用文字列
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb ない ("nai") and
-# behave like an adjective.
-# e.g. 申し訳, 仕方, とんでも, 違い
-#名詞-ナイ形容詞語幹
-#
-#####
-# prefix: unclassified prefixes
-#接頭詞
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. お (水), 某 (氏), 同 (社), 故 (~氏), 高 (品質), お (見事), ご (立派)
-#接頭詞-名詞接続
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by なる/なさる/くださる.
-# e.g. お (読みなさい), お (座り)
-#接頭詞-動詞接続
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. お (寒いですねえ), バカ (でかい)
-#接頭詞-形容詞接続
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. 約, およそ, 毎時
-#接頭詞-数接続
-#
-#####
-# verb: unclassified verbs
-#動詞
-#
-# verb-main:
-#動詞-自立
-#
-# verb-auxiliary:
-#動詞-非自立
-#
-# verb-suffix:
-#動詞-接尾
-#
-#####
-# adjective: unclassified adjectives
-#形容詞
-#
-# adjective-main:
-#形容詞-自立
-#
-# adjective-auxiliary:
-#形容詞-非自立
-#
-# adjective-suffix:
-#形容詞-接尾
-#
-#####
-# adverb: unclassified adverbs
-#副詞
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. あいかわらず, 多分
-#副詞-一般
-#
-# adverb-particle_conjunction: Adverbs that can be followed by の, は, に,
-# な, する, だ, etc.
-# e.g. こんなに, そんなに, あんなに, なにか, なんでも
-#副詞-助詞類接続
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. この, その, あの, どの, いわゆる, なんらかの, 何らかの, いろんな, こういう, そういう, ああいう,
-# どういう, こんな, そんな, あんな, どんな, 大きな, 小さな, おかしな, ほんの, たいした,
-# 「(, も) さる (ことながら)」, 微々たる, 堂々たる, 単なる, いかなる, 我が」「同じ, 亡き
-#連体詞
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. が, けれども, そして, じゃあ, それどころか
-接続詞
-#
-#####
-# particle: unclassified particles.
-助詞
-#
-# particle-case: case particles where the subclassification is undefined.
-助詞-格助詞
-#
-# particle-case-misc: Case particles.
-# e.g. から, が, で, と, に, へ, より, を, の, にて
-助詞-格助詞-一般
-#
-# particle-case-quote: the "to" that appears after nouns, a person’s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( だ) と (述べた.), ( である) と (して執行猶予...)
-助詞-格助詞-引用
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. という, といった, とかいう, として, とともに, と共に, でもって, にあたって, に当たって, に当って,
-# にあたり, に当たり, に当り, に当たる, にあたる, において, に於いて,に於て, における, に於ける,
-# にかけ, にかけて, にかんし, に関し, にかんして, に関して, にかんする, に関する, に際し,
-# に際して, にしたがい, に従い, に従う, にしたがって, に従って, にたいし, に対し, にたいして,
-# に対して, にたいする, に対する, について, につき, につけ, につけて, につれ, につれて, にとって,
-# にとり, にまつわる, によって, に依って, に因って, により, に依り, に因り, による, に依る, に因る,
-# にわたって, にわたる, をもって, を以って, を通じ, を通じて, を通して, をめぐって, をめぐり, をめぐる,
-# って-口語/, ちゅう-関西弁「という」/, (何) ていう (人)-口語/, っていう-口語/, といふ, とかいふ
-助詞-格助詞-連語
-#
-# particle-conjunctive:
-# e.g. から, からには, が, けれど, けれども, けど, し, つつ, て, で, と, ところが, どころか, とも, ども,
-# ながら, なり, ので, のに, ば, ものの, や ( した), やいなや, (ころん) じゃ(いけない)-口語/,
-# (行っ) ちゃ(いけない)-口語/, (言っ) たって (しかたがない)-口語/, (それがなく)ったって (平気)-口語/
-助詞-接続助詞
-#
-# particle-dependency:
-# e.g. こそ, さえ, しか, すら, は, も, ぞ
-助詞-係助詞
-#
-# particle-adverbial:
-# e.g. がてら, かも, くらい, 位, ぐらい, しも, (学校) じゃ(これが流行っている)-口語/,
-# (それ)じゃあ (よくない)-口語/, ずつ, (私) なぞ, など, (私) なり (に), (先生) なんか (大嫌い)-口語/,
-# (私) なんぞ, (先生) なんて (大嫌い)-口語/, のみ, だけ, (私) だって-口語/, だに,
-# (彼)ったら-口語/, (お茶) でも (いかが), 等 (とう), (今後) とも, ばかり, ばっか-口語/, ばっかり-口語/,
-# ほど, 程, まで, 迄, (誰) も (が)([助詞-格助詞] および [助詞-係助詞] の前に位置する「も」)
-助詞-副助詞
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (松島) や
-助詞-間投助詞
-#
-# particle-coordinate:
-# e.g. と, たり, だの, だり, とか, なり, や, やら
-助詞-並立助詞
-#
-# particle-final:
-# e.g. かい, かしら, さ, ぜ, (だ)っけ-口語/, (とまってる) で-方言/, な, ナ, なあ-口語/, ぞ, ね, ネ,
-# ねぇ-口語/, ねえ-口語/, ねん-方言/, の, のう-口語/, や, よ, ヨ, よぉ-口語/, わ, わい-口語/
-助詞-終助詞
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) 「A か B か」. Ex:「(国内で運用する) か,(海外で運用する) か (.)」
-# (b) Inside an adverb phrase. Ex:「(幸いという) か (, 死者はいなかった.)」
-# 「(祈りが届いたせい) か (, 試験に合格した.)」
-# (c) 「かのように」. Ex:「(何もなかった) か (のように振る舞った.)」
-# e.g. か
-助詞-副助詞/並立助詞/終助詞
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-助詞-連体化
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. に, と
-助詞-副詞化
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. かな, けむ, ( しただろう) に, (あんた) にゃ(わからん), (俺) ん (家)
-助詞-特殊
-#
-#####
-# auxiliary-verb:
-助動詞
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. おはよう, おはようございます, こんにちは, こんばんは, ありがとう, どうもありがとう, ありがとうございます,
-# いただきます, ごちそうさま, さよなら, さようなら, はい, いいえ, ごめん, ごめんなさい
-#感動詞
-#
-#####
-# symbol: unclassified Symbols.
-記号
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [○◎@$〒→+]
-記号-一般
-#
-# symbol-comma: Commas
-# e.g. [,、]
-記号-読点
-#
-# symbol-period: Periods and full stops.
-# e.g. [..。]
-記号-句点
-#
-# symbol-space: Full-width whitespace.
-記号-空白
-#
-# symbol-open_bracket:
-# e.g. [({‘“『【]
-記号-括弧開
-#
-# symbol-close_bracket:
-# e.g. [)}’”』」】]
-記号-括弧閉
-#
-# symbol-alphabetic:
-#記号-アルファベット
-#
-#####
-# other: unclassified other
-#その他
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (だ)ァ
-その他-間投
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. あの, うんと, えと
-フィラー
-#
-#####
-# non-verbal: non-verbal sound.
-非言語音
-#
-#####
-# fragment:
-#語断片
-#
-#####
-# unknown: unknown part of speech.
-#未知語
-#
-##### End of file
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ar.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ar.txt
deleted file mode 100644
index aca2430..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both أ and ا
-من
-ومن
-منها
-منه
-في
-وفي
-فيها
-فيه
-و
-ف
-ثم
-او
-أو
-ب
-بها
-به
-ا
-أ
-اى
-اي
-أي
-أى
-لا
-ولا
-الا
-ألا
-إلا
-لكن
-ما
-وما
-كما
-فما
-عن
-مع
-اذا
-إذا
-ان
-أن
-إن
-انها
-أنها
-إنها
-انه
-أنه
-إنه
-بان
-بأن
-فان
-فأن
-وان
-وأن
-وإن
-التى
-التي
-الذى
-الذي
-الذين
-الى
-الي
-إلى
-إلي
-على
-عليها
-عليه
-اما
-أما
-إما
-ايضا
-أيضا
-كل
-وكل
-لم
-ولم
-لن
-ولن
-هى
-هي
-هو
-وهى
-وهي
-وهو
-فهى
-فهي
-فهو
-انت
-أنت
-لك
-لها
-له
-هذه
-هذا
-تلك
-ذلك
-هناك
-كانت
-كان
-يكون
-تكون
-وكانت
-وكان
-غير
-بعض
-قد
-نحو
-بين
-بينما
-منذ
-ضمن
-حيث
-الان
-الآن
-خلال
-بعد
-قبل
-حتى
-عند
-عندما
-لدى
-جميع
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_bg.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 842ee8a..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-а
-аз
-ако
-ала
-бе
-без
-беше
-би
-бил
-била
-били
-било
-близо
-бъдат
-бъде
-бяха
-в
-вас
-ваш
-ваша
-вероятно
-вече
-взема
-ви
-вие
-винаги
-все
-всеки
-всички
-всичко
-всяка
-във
-въпреки
-върху
-г
-ги
-главно
-го
-д
-да
-дали
-до
-докато
-докога
-дори
-досега
-доста
-е
-едва
-един
-ето
-за
-зад
-заедно
-заради
-засега
-затова
-защо
-защото
-и
-из
-или
-им
-има
-имат
-иска
-й
-каза
-как
-каква
-какво
-както
-какъв
-като
-кога
-когато
-което
-които
-кой
-който
-колко
-която
-къде
-където
-към
-ли
-м
-ме
-между
-мен
-ми
-мнозина
-мога
-могат
-може
-моля
-момента
-му
-н
-на
-над
-назад
-най
-направи
-напред
-например
-нас
-не
-него
-нея
-ни
-ние
-никой
-нито
-но
-някои
-някой
-няма
-обаче
-около
-освен
-особено
-от
-отгоре
-отново
-още
-пак
-по
-повече
-повечето
-под
-поне
-поради
-после
-почти
-прави
-пред
-преди
-през
-при
-пък
-първо
-с
-са
-само
-се
-сега
-си
-скоро
-след
-сме
-според
-сред
-срещу
-сте
-съм
-със
-също
-т
-тази
-така
-такива
-такъв
-там
-твой
-те
-тези
-ти
-тн
-то
-това
-тогава
-този
-той
-толкова
-точно
-трябва
-тук
-тъй
-тя
-тях
-у
-харесва
-ч
-че
-често
-чрез
-ще
-щом
-я
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ca.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ca.txt
deleted file mode 100644
index e618af5..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ací
-ah
-així
-això
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-allà
-allí
-allò
-altra
-altre
-altres
-amb
-ambdós
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aquí
-baix
-cada
-cadascú
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-després
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-érem
-eren
-éreu
-es
-és
-esta
-està
-estàvem
-estaven
-estàveu
-esteu
-et
-etc
-ets
-fins
-fora
-gairebé
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-més
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-només
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-però
-perquè
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-què
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-sóc
-solament
-sols
-son
-són
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-també
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_cz.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_cz.txt
deleted file mode 100644
index e928df9..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-tímto
-budeš
-budem
-byli
-jseš
-můj
-svým
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-proč
-máte
-tato
-kam
-tohoto
-kdo
-kteří
-mi
-nám
-tom
-tomuto
-mít
-nic
-proto
-kterou
-byla
-toho
-protože
-asi
-ho
-naši
-napište
-re
-což
-tím
-takže
-svých
-její
-svými
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-pravé
-ji
-nad
-nejsou
-či
-pod
-téma
-mezi
-přes
-ty
-pak
-vám
-ani
-když
-však
-neg
-jsem
-tento
-článku
-články
-aby
-jsme
-před
-pta
-jejich
-byl
-ještě
-až
-bez
-také
-pouze
-první
-vaše
-která
-nás
-nový
-tipy
-pokud
-může
-strana
-jeho
-své
-jiné
-zprávy
-nové
-není
-vás
-jen
-podle
-zde
-už
-být
-více
-bude
-již
-než
-který
-by
-které
-co
-nebo
-ten
-tak
-má
-při
-od
-po
-jsou
-jak
-další
-ale
-si
-se
-ve
-to
-jako
-za
-zpět
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-přičemž
-já
-on
-ona
-ono
-oni
-ony
-my
-vy
-jí
-ji
-mě
-mne
-jemu
-tomu
-těm
-těmu
-němu
-němuž
-jehož
-jíž
-jelikož
-jež
-jakož
-načež
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_da.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_da.txt
deleted file mode 100644
index 5f28f85..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,108 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-på | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-når | when
-være | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-også | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-været | be
-thi | for (conj)
-jer | you
-sådan | such, like this/like that
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_de.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_de.txt
deleted file mode 100644
index fe27a53..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,292 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-daß | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-für | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-können | can
-könnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-über | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-während | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-würde | would
-würden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_el.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_el.txt
deleted file mode 100644
index a5abfbe..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use 'σ' instead of 'ς'
-ο
-η
-το
-οι
-τα
-του
-τησ
-των
-τον
-την
-και
-κι
-κ
-ειμαι
-εισαι
-ειναι
-ειμαστε
-ειστε
-στο
-στον
-στη
-στην
-μα
-αλλα
-απο
-για
-προσ
-με
-σε
-ωσ
-παρα
-αντι
-κατα
-μετα
-θα
-να
-δε
-δεν
-μη
-μην
-επι
-ενω
-εαν
-αν
-τοτε
-που
-πωσ
-ποιοσ
-ποια
-ποιο
-ποιοι
-ποιεσ
-ποιων
-ποιουσ
-αυτοσ
-αυτη
-αυτο
-αυτοι
-αυτων
-αυτουσ
-αυτεσ
-αυτα
-εκεινοσ
-εκεινη
-εκεινο
-εκεινοι
-εκεινεσ
-εκεινα
-εκεινων
-εκεινουσ
-οπωσ
-ομωσ
-ισωσ
-οσο
-οτι
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_es.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_es.txt
deleted file mode 100644
index ba3463d..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_es.txt
+++ /dev/null
@@ -1,354 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-más | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-sí | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | está from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-también | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | están from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | había from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-mí | me
-antes | before
-algunos | some
-qué | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-él | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-tú | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-mío | mine
-mía |
-míos |
-mías |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-estás
-está
-estamos
-estáis
-están
-esté
-estés
-estemos
-estéis
-estén
-estaré
-estarás
-estará
-estaremos
-estaréis
-estarán
-estaría
-estarías
-estaríamos
-estaríais
-estarían
-estaba
-estabas
-estábamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuviéramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuviésemos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-habéis
-han
-haya
-hayas
-hayamos
-hayáis
-hayan
-habré
-habrás
-habrá
-habremos
-habréis
-habrán
-habría
-habrías
-habríamos
-habríais
-habrían
-había
-habías
-habíamos
-habíais
-habían
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubiéramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubiésemos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-seáis
-sean
-seré
-serás
-será
-seremos
-seréis
-serán
-sería
-serías
-seríamos
-seríais
-serían
-era
-eras
-éramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fuéramos
-fuerais
-fueran
-fuese
-fueses
-fuésemos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-tenéis
-tienen
-tenga
-tengas
-tengamos
-tengáis
-tengan
-tendré
-tendrás
-tendrá
-tendremos
-tendréis
-tendrán
-tendría
-tendrías
-tendríamos
-tendríais
-tendrían
-tenía
-tenías
-teníamos
-teníais
-tenían
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuviéramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuviésemos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_eu.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_eu.txt
deleted file mode 100644
index 7e4e4bb..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_eu.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fa.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fa.txt
deleted file mode 100644
index 76bb635..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fa.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic 'ي' instead of 'ی'
-انان
-نداشته
-سراسر
-خياه
-ايشان
-وي
-تاكنون
-بيشتري
-دوم
-پس
-ناشي
-وگو
-يا
-داشتند
-سپس
-هنگام
-هرگز
-پنج
-نشان
-امسال
-ديگر
-گروهي
-شدند
-چطور
-ده
-و
-دو
-نخستين
-ولي
-چرا
-چه
-وسط
-ه
-كدام
-قابل
-يك
-رفت
-هفت
-همچنين
-در
-هزار
-بله
-بلي
-شايد
-اما
-شناسي
-گرفته
-دهد
-داشته
-دانست
-داشتن
-خواهيم
-ميليارد
-وقتيكه
-امد
-خواهد
-جز
-اورده
-شده
-بلكه
-خدمات
-شدن
-برخي
-نبود
-بسياري
-جلوگيري
-حق
-كردند
-نوعي
-بعري
-نكرده
-نظير
-نبايد
-بوده
-بودن
-داد
-اورد
-هست
-جايي
-شود
-دنبال
-داده
-بايد
-سابق
-هيچ
-همان
-انجا
-كمتر
-كجاست
-گردد
-كسي
-تر
-مردم
-تان
-دادن
-بودند
-سري
-جدا
-ندارند
-مگر
-يكديگر
-دارد
-دهند
-بنابراين
-هنگامي
-سمت
-جا
-انچه
-خود
-دادند
-زياد
-دارند
-اثر
-بدون
-بهترين
-بيشتر
-البته
-به
-براساس
-بيرون
-كرد
-بعضي
-گرفت
-توي
-اي
-ميليون
-او
-جريان
-تول
-بر
-مانند
-برابر
-باشيم
-مدتي
-گويند
-اكنون
-تا
-تنها
-جديد
-چند
-بي
-نشده
-كردن
-كردم
-گويد
-كرده
-كنيم
-نمي
-نزد
-روي
-قصد
-فقط
-بالاي
-ديگران
-اين
-ديروز
-توسط
-سوم
-ايم
-دانند
-سوي
-استفاده
-شما
-كنار
-داريم
-ساخته
-طور
-امده
-رفته
-نخست
-بيست
-نزديك
-طي
-كنيد
-از
-انها
-تمامي
-داشت
-يكي
-طريق
-اش
-چيست
-روب
-نمايد
-گفت
-چندين
-چيزي
-تواند
-ام
-ايا
-با
-ان
-ايد
-ترين
-اينكه
-ديگري
-راه
-هايي
-بروز
-همچنان
-پاعين
-كس
-حدود
-مختلف
-مقابل
-چيز
-گيرد
-ندارد
-ضد
-همچون
-سازي
-شان
-مورد
-باره
-مرسي
-خويش
-برخوردار
-چون
-خارج
-شش
-هنوز
-تحت
-ضمن
-هستيم
-گفته
-فكر
-بسيار
-پيش
-براي
-روزهاي
-انكه
-نخواهد
-بالا
-كل
-وقتي
-كي
-چنين
-كه
-گيري
-نيست
-است
-كجا
-كند
-نيز
-يابد
-بندي
-حتي
-توانند
-عقب
-خواست
-كنند
-بين
-تمام
-همه
-ما
-باشند
-مثل
-شد
-اري
-باشد
-اره
-طبق
-بعد
-اگر
-صورت
-غير
-جاي
-بيش
-ريزي
-اند
-زيرا
-چگونه
-بار
-لطفا
-مي
-درباره
-من
-ديده
-همين
-گذاري
-برداري
-علت
-گذاشته
-هم
-فوق
-نه
-ها
-شوند
-اباد
-همواره
-هر
-اول
-خواهند
-چهار
-نام
-امروز
-مان
-هاي
-قبل
-كنم
-سعي
-تازه
-را
-هستند
-زير
-جلوي
-عنوان
-بود
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fi.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fi.txt
deleted file mode 100644
index 4527b7e..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fi.txt
+++ /dev/null
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eivät
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-minä minun minut minua minussa minusta minuun minulla minulta minulle | I
-sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
-me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
-te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
-he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
-
-tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
-tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
-nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
-
-kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
-ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
-mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
-mitkä | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-että | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sekä | and
-sillä | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fr.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fr.txt
deleted file mode 100644
index f8d630a..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_fr.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-même | same; as in moi-même (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-à | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of être (not including the infinitive):
-été
-étée
-étées
-étés
-étant
-suis
-es
-est
-sommes
-êtes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-étais
-était
-étions
-étiez
-étaient
-fus
-fut
-fûmes
-fûtes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-fût
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-eûmes
-eûtes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-eût
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-celà | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ga.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ga.txt
deleted file mode 100644
index f03e10a..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ga.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bhúr
-caoga
-ceathair
-ceathrar
-chomh
-chtó
-chuig
-chun
-cois
-céad
-cúig
-cúigear
-d'
-daichead
-dar
-de
-deich
-deichniúr
-den
-dhá
-do
-don
-dtí
-dá
-dár
-dó
-faoi
-faoin
-faoina
-faoinár
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-inár
-is
-le
-leis
-lena
-lenár
-m'
-mar
-mo
-mé
-na
-nach
-naoi
-naonúr
-ná
-ní
-níor
-nó
-nócha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seachtó
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-sé
-sí
-tar
-thar
-thú
-triúr
-trí
-trína
-trínár
-tríocha
-tú
-um
-ár
-é
-éis
-í
-ó
-ón
-óna
-ónár
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_gl.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_gl.txt
deleted file mode 100644
index 57803b3..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_gl.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-# galican stopwords
-a
-aínda
-alí
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aquí
-ao
-aos
-as
-así
-á
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalgún
-dalgúns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-está
-están
-este
-estes
-estiven
-estou
-eu
-é
-facer
-foi
-foron
-fun
-había
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-miña
-miñas
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-nós
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-ó
-ós
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-senón
-ser
-seu
-seus
-sexa
-sido
-sobre
-súa
-súas
-tamén
-tan
-te
-ten
-teñen
-teño
-ter
-teu
-teus
-ti
-tido
-tiña
-tiven
-túa
-túas
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-vós
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hi.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hi.txt
deleted file mode 100644
index 1f19fd1..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hi.txt
+++ /dev/null
@@ -1,235 +0,0 @@
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-अंदर
-अत
-अपना
-अपनी
-अपने
-अभी
-आदि
-आप
-इत्यादि
-इन
-इनका
-इन्हीं
-इन्हें
-इन्हों
-इस
-इसका
-इसकी
-इसके
-इसमें
-इसी
-इसे
-उन
-उनका
-उनकी
-उनके
-उनको
-उन्हीं
-उन्हें
-उन्हों
-उस
-उसके
-उसी
-उसे
-एक
-एवं
-एस
-ऐसे
-और
-कई
-कर
-करता
-करते
-करना
-करने
-करें
-कहते
-कहा
-का
-काफ़ी
-कि
-कितना
-किन्हें
-किन्हों
-किया
-किर
-किस
-किसी
-किसे
-की
-कुछ
-कुल
-के
-को
-कोई
-कौन
-कौनसा
-गया
-घर
-जब
-जहाँ
-जा
-जितना
-जिन
-जिन्हें
-जिन्हों
-जिस
-जिसे
-जीधर
-जैसा
-जैसे
-जो
-तक
-तब
-तरह
-तिन
-तिन्हें
-तिन्हों
-तिस
-तिसे
-तो
-था
-थी
-थे
-दबारा
-दिया
-दुसरा
-दूसरे
-दो
-द्वारा
-न
-नहीं
-ना
-निहायत
-नीचे
-ने
-पर
-पर
-पहले
-पूरा
-पे
-फिर
-बनी
-बही
-बहुत
-बाद
-बाला
-बिलकुल
-भी
-भीतर
-मगर
-मानो
-मे
-में
-यदि
-यह
-यहाँ
-यही
-या
-यिह
-ये
-रखें
-रहा
-रहे
-ऱ्वासा
-लिए
-लिये
-लेकिन
-व
-वर्ग
-वह
-वह
-वहाँ
-वहीं
-वाले
-वुह
-वे
-वग़ैरह
-संग
-सकता
-सकते
-सबसे
-सभी
-साथ
-साबुत
-साभ
-सारा
-से
-सो
-ही
-हुआ
-हुई
-हुए
-है
-हैं
-हो
-होता
-होती
-होते
-होना
-होने
-# additional normalized forms of the above
-अपनि
-जेसे
-होति
-सभि
-तिंहों
-इंहों
-दवारा
-इसि
-किंहें
-थि
-उंहों
-ओर
-जिंहें
-वहिं
-अभि
-बनि
-हि
-उंहिं
-उंहें
-हें
-वगेरह
-एसे
-रवासा
-कोन
-निचे
-काफि
-उसि
-पुरा
-भितर
-हे
-बहि
-वहां
-कोइ
-यहां
-जिंहों
-तिंहें
-किसि
-कइ
-यहि
-इंहिं
-जिधर
-इंहें
-अदि
-इतयादि
-हुइ
-कोनसा
-इसकि
-दुसरे
-जहां
-अप
-किंहों
-उनकि
-भि
-वरग
-हुअ
-जेसा
-नहिं
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hu.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hu.txt
deleted file mode 100644
index 1c6d333..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hu.txt
+++ /dev/null
@@ -1,209 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-által
-általában
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-amíg
-amikor
-át
-abban
-ahhoz
-annak
-arra
-arról
-az
-azok
-azon
-azt
-azzal
-azért
-aztán
-azután
-azonban
-bár
-be
-belül
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-egész
-egy
-egyes
-egyetlen
-egyéb
-egyik
-egyre
-ekkor
-el
-elég
-ellen
-elő
-először
-előtt
-első
-én
-éppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ezért
-és
-fel
-felé
-hanem
-hiszen
-hogy
-hogyan
-igen
-így
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ismét
-itt
-jó
-jól
-jobban
-kell
-kellett
-keresztül
-keressünk
-ki
-kívül
-között
-közül
-legalább
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-magát
-majd
-majd
-már
-más
-másik
-meg
-még
-mellett
-mert
-mely
-melyek
-mi
-mit
-míg
-miért
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-néha
-nekem
-neki
-nem
-néhány
-nélkül
-nincs
-olyan
-ott
-össze
-ő
-ők
-őket
-pedig
-persze
-rá
-s
-saját
-sem
-semmi
-sok
-sokat
-sokkal
-számára
-szemben
-szerint
-szinte
-talán
-tehát
-teljes
-tovább
-továbbá
-több
-úgy
-ugyanis
-új
-újabb
-újra
-után
-utána
-utolsó
-vagy
-vagyis
-valaki
-valami
-valamint
-való
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hy.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hy.txt
deleted file mode 100644
index 968a8f4..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_hy.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# example set of Armenian stopwords.
-այդ
-այլ
-այն
-այս
-դու
-դուք
-եմ
-են
-ենք
-ես
-եք
-է
-էի
-էին
-էինք
-էիր
-էիք
-էր
-ըստ
-թ
-ի
-ին
-իսկ
-իր
-կամ
-համար
-հետ
-հետո
-մենք
-մեջ
-մի
-ն
-նա
-նաև
-նրա
-նրանք
-որ
-որը
-որոնք
-որպես
-ու
-ում
-պիտի
-վրա
-և
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_id.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_id.txt
deleted file mode 100644
index 80df39a..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_id.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_it.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_it.txt
deleted file mode 100644
index 7c64aef..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_it.txt
+++ /dev/null
@@ -1,301 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perché | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-più | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avrò
-avrai
-avrà
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-è
-siamo
-siete
-sia
-siate
-siano
-sarò
-sarai
-sarà
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-farò
-farai
-farà
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-starò
-starai
-starà
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ja.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ja.txt
deleted file mode 100644
index bc3077e..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ja.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-の
-に
-は
-を
-た
-が
-で
-て
-と
-し
-れ
-さ
-ある
-いる
-も
-する
-から
-な
-こと
-として
-い
-や
-れる
-など
-なっ
-ない
-この
-ため
-その
-あっ
-よう
-また
-もの
-という
-あり
-まで
-られ
-なる
-へ
-か
-だ
-これ
-によって
-により
-おり
-より
-による
-ず
-なり
-られる
-において
-ば
-なかっ
-なく
-しかし
-について
-せ
-だっ
-その後
-できる
-それ
-う
-ので
-なお
-のみ
-でき
-き
-つ
-における
-および
-いう
-さらに
-でも
-ら
-たり
-その他
-に関する
-たち
-ます
-ん
-なら
-に対して
-特に
-せる
-及び
-これら
-とき
-では
-にて
-ほか
-ながら
-うち
-そして
-とともに
-ただし
-かつて
-それぞれ
-または
-お
-ほど
-ものの
-に対する
-ほとんど
-と共に
-といった
-です
-とも
-ところ
-ここ
-##### End of file
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_lv.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_lv.txt
deleted file mode 100644
index a471097..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_lv.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apakš
-ārpus
-augšpus
-bez
-caur
-dēļ
-gar
-iekš
-iz
-kopš
-labad
-lejpus
-līdz
-no
-otrpus
-pa
-par
-pār
-pēc
-pie
-pirms
-pret
-priekš
-starp
-šaipus
-uz
-viņpus
-virs
-virspus
-zem
-apakšpus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tomēr
-tikko
-turpretī
-arī
-kaut
-gan
-tādēļ
-tā
-ne
-tikvien
-vien
-kā
-ir
-te
-vai
-kamēr
-# Particles
-ar
-diezin
-droši
-diemžēl
-nebūt
-ik
-it
-taču
-nu
-pat
-tiklab
-iekšpus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iekām
-iekāms
-kolīdz
-līdzko
-tiklīdz
-jebšu
-tālab
-tāpēc
-nekā
-itin
-jā
-jau
-jel
-nē
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-būt
-biju
-biji
-bija
-bijām
-bijāt
-esmu
-esi
-esam
-esat
-būšu
-būsi
-būs
-būsim
-būsiet
-tikt
-tiku
-tiki
-tika
-tikām
-tikāt
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tikšu
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tapāt
-topat
-tapšu
-tapsi
-taps
-tapsim
-tapsiet
-kļūt
-kļuvu
-kļuvi
-kļuva
-kļuvām
-kļuvāt
-kļūstu
-kļūsti
-kļūst
-kļūstam
-kļūstat
-kļūšu
-kļūsi
-kļūs
-kļūsim
-kļūsiet
-# verbs
-varēt
-varēju
-varējām
-varēšu
-varēsim
-var
-varēji
-varējāt
-varēsi
-varēsiet
-varat
-varēja
-varēs
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_nl.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_nl.txt
deleted file mode 100644
index 2a16430..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_nl.txt
+++ /dev/null
@@ -1,117 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_no.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_no.txt
deleted file mode 100644
index 0b037b5..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_no.txt
+++ /dev/null
@@ -1,192 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Norwegian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This stop word list is for the dominant bokmål dialect. Words unique
- | to nynorsk are marked *.
-
- | Revised by Jan Bruusgaard , Jan 2005
-
-og | and
-i | in
-jeg | I
-det | it/this/that
-at | to (w. inf.)
-en | a/an
-et | a/an
-den | it/this/that
-til | to
-er | is/am/are
-som | who/that
-på | on
-de | they / you(formal)
-med | with
-han | he
-av | of
-ikke | not
-ikkje | not *
-der | there
-så | so
-var | was/were
-meg | me
-seg | you
-men | but
-ett | one
-har | have
-om | about
-vi | we
-min | my
-mitt | my
-ha | have
-hadde | had
-hun | she
-nå | now
-over | over
-da | when/as
-ved | by/know
-fra | from
-du | you
-ut | out
-sin | your
-dem | them
-oss | us
-opp | up
-man | you/one
-kan | can
-hans | his
-hvor | where
-eller | or
-hva | what
-skal | shall/must
-selv | self (reflective)
-sjøl | self (reflective)
-her | here
-alle | all
-vil | will
-bli | become
-ble | became
-blei | became *
-blitt | have become
-kunne | could
-inn | in
-når | when
-være | be
-kom | come
-noen | some
-noe | some
-ville | would
-dere | you
-som | who/which/that
-deres | their/theirs
-kun | only/just
-ja | yes
-etter | after
-ned | down
-skulle | should
-denne | this
-for | for/because
-deg | you
-si | hers/his
-sine | hers/his
-sitt | hers/his
-mot | against
-å | to
-meget | much
-hvorfor | why
-dette | this
-disse | these/those
-uten | without
-hvordan | how
-ingen | none
-din | your
-ditt | your
-blir | become
-samme | same
-hvilken | which
-hvilke | which (plural)
-sånn | such a
-inni | inside/within
-mellom | between
-vår | our
-hver | each
-hvem | who
-vors | us/ours
-hvis | whose
-både | both
-bare | only/just
-enn | than
-fordi | as/because
-før | before
-mange | many
-også | also
-slik | just
-vært | been
-være | to be
-båe | both *
-begge | both
-siden | since
-dykk | your *
-dykkar | yours *
-dei | they *
-deira | them *
-deires | theirs *
-deim | them *
-di | your (fem.) *
-då | as/when *
-eg | I *
-ein | a/an *
-eit | a/an *
-eitt | a/an *
-elles | or *
-honom | he *
-hjå | at *
-ho | she *
-hoe | she *
-henne | her
-hennar | her/hers
-hennes | hers
-hoss | how *
-hossen | how *
-ikkje | not *
-ingi | noone *
-inkje | noone *
-korleis | how *
-korso | how *
-kva | what/which *
-kvar | where *
-kvarhelst | where *
-kven | who/whom *
-kvi | why *
-kvifor | why *
-me | we *
-medan | while *
-mi | my *
-mine | my *
-mykje | much *
-no | now *
-nokon | some (masc./neut.) *
-noka | some (fem.) *
-nokor | some *
-noko | some *
-nokre | some *
-si | his/hers *
-sia | since *
-sidan | since *
-so | so *
-somt | some *
-somme | some *
-um | about*
-upp | up *
-vere | be *
-vore | was *
-verte | become *
-vort | become *
-varte | became *
-vart | became *
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_pt.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_pt.txt
deleted file mode 100644
index e156c88..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_pt.txt
+++ /dev/null
@@ -1,251 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Portuguese stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | of, from
-a | the; to, at; her
-o | the; him
-que | who, that
-e | and
-do | de + o
-da | de + a
-em | in
-um | a
-para | for
- | é from SER
-com | with
-não | not, no
-uma | a
-os | the; them
-no | em + o
-se | himself etc
-na | em + a
-por | for
-mais | more
-as | the; them
-dos | de + os
-como | as, like
-mas | but
- | foi from SER
-ao | a + o
-ele | he
-das | de + as
- | tem from TER
-à | a + a
-seu | his
-sua | her
-ou | or
- | ser from SER
-quando | when
-muito | much
- | há from HAV
-nos | em + os; us
-já | already, now
- | está from EST
-eu | I
-também | also
-só | only, just
-pelo | per + o
-pela | per + a
-até | up to
-isso | that
-ela | he
-entre | between
- | era from SER
-depois | after
-sem | without
-mesmo | same
-aos | a + os
- | ter from TER
-seus | his
-quem | whom
-nas | em + as
-me | me
-esse | that
-eles | they
- | estão from EST
-você | you
- | tinha from TER
- | foram from SER
-essa | that
-num | em + um
-nem | nor
-suas | her
-meu | my
-às | a + as
-minha | my
- | têm from TER
-numa | em + uma
-pelos | per + os
-elas | they
- | havia from HAV
- | seja from SER
-qual | which
- | será from SER
-nós | we
- | tenho from TER
-lhe | to him, her
-deles | of them
-essas | those
-esses | those
-pelas | per + as
-este | this
- | fosse from SER
-dele | of him
-
- | other words. There are many contractions such as naquele = em+aquele,
- | mo = me+o, but they are rare.
- | Indefinite article plural forms are also rare.
-
-tu | thou
-te | thee
-vocês | you (plural)
-vos | you
-lhes | to them
-meus | my
-minhas
-teu | thy
-tua
-teus
-tuas
-nosso | our
-nossa
-nossos
-nossas
-
-dela | of her
-delas | of them
-
-esta | this
-estes | these
-estas | these
-aquele | that
-aquela | that
-aqueles | those
-aquelas | those
-isto | this
-aquilo | that
-
- | forms of estar, to be (not including the infinitive):
-estou
-está
-estamos
-estão
-estive
-esteve
-estivemos
-estiveram
-estava
-estávamos
-estavam
-estivera
-estivéramos
-esteja
-estejamos
-estejam
-estivesse
-estivéssemos
-estivessem
-estiver
-estivermos
-estiverem
-
- | forms of haver, to have (not including the infinitive):
-hei
-há
-havemos
-hão
-houve
-houvemos
-houveram
-houvera
-houvéramos
-haja
-hajamos
-hajam
-houvesse
-houvéssemos
-houvessem
-houver
-houvermos
-houverem
-houverei
-houverá
-houveremos
-houverão
-houveria
-houveríamos
-houveriam
-
- | forms of ser, to be (not including the infinitive):
-sou
-somos
-são
-era
-éramos
-eram
-fui
-foi
-fomos
-foram
-fora
-fôramos
-seja
-sejamos
-sejam
-fosse
-fôssemos
-fossem
-for
-formos
-forem
-serei
-será
-seremos
-serão
-seria
-seríamos
-seriam
-
- | forms of ter, to have (not including the infinitive):
-tenho
-tem
-temos
-tém
-tinha
-tínhamos
-tinham
-tive
-teve
-tivemos
-tiveram
-tivera
-tivéramos
-tenha
-tenhamos
-tenham
-tivesse
-tivéssemos
-tivessem
-tiver
-tivermos
-tiverem
-terei
-terá
-teremos
-terão
-teria
-teríamos
-teriam
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ro.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ro.txt
deleted file mode 100644
index 2d7395c..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ro.txt
+++ /dev/null
@@ -1,233 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-acea
-aceasta
-această
-aceea
-acei
-aceia
-acel
-acela
-acele
-acelea
-acest
-acesta
-aceste
-acestea
-aceşti
-aceştia
-acolo
-acum
-ai
-aia
-aibă
-aici
-al
-ăla
-ale
-alea
-ălea
-altceva
-altcineva
-am
-ar
-are
-aş
-aşadar
-asemenea
-asta
-ăsta
-astăzi
-astea
-ăstea
-ăştia
-asupra
-aţi
-au
-avea
-avem
-aveţi
-azi
-bine
-bucur
-bună
-ca
-că
-căci
-când
-care
-cărei
-căror
-cărui
-cât
-câte
-câţi
-către
-câtva
-ce
-cel
-ceva
-chiar
-cînd
-cine
-cineva
-cît
-cîte
-cîţi
-cîtva
-contra
-cu
-cum
-cumva
-curând
-curînd
-da
-dă
-dacă
-dar
-datorită
-de
-deci
-deja
-deoarece
-departe
-deşi
-din
-dinaintea
-dintr
-dintre
-drept
-după
-ea
-ei
-el
-ele
-eram
-este
-eşti
-eu
-face
-fără
-fi
-fie
-fiecare
-fii
-fim
-fiţi
-iar
-ieri
-îi
-îl
-îmi
-împotriva
-în
-înainte
-înaintea
-încât
-încît
-încotro
-între
-întrucât
-întrucît
-îţi
-la
-lângă
-le
-li
-lîngă
-lor
-lui
-mă
-mâine
-mea
-mei
-mele
-mereu
-meu
-mi
-mine
-mult
-multă
-mulţi
-ne
-nicăieri
-nici
-nimeni
-nişte
-noastră
-noastre
-noi
-noştri
-nostru
-nu
-ori
-oricând
-oricare
-oricât
-orice
-oricînd
-oricine
-oricît
-oricum
-oriunde
-până
-pe
-pentru
-peste
-pînă
-poate
-pot
-prea
-prima
-primul
-prin
-printr
-sa
-să
-săi
-sale
-sau
-său
-se
-şi
-sînt
-sîntem
-sînteţi
-spre
-sub
-sunt
-suntem
-sunteţi
-ta
-tăi
-tale
-tău
-te
-ţi
-ţie
-tine
-toată
-toate
-tot
-toţi
-totuşi
-tu
-un
-una
-unde
-undeva
-unei
-unele
-uneori
-unor
-vă
-vi
-voastră
-voastre
-voi
-voştri
-vostru
-vouă
-vreo
-vreun
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ru.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ru.txt
deleted file mode 100644
index 514b9c2..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_ru.txt
+++ /dev/null
@@ -1,241 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | a russian stop word list. comments begin with vertical bar. each stop
- | word is at the start of a line.
-
- | this is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | letter `ё' is translated to `е'.
-
-и | and
-в | in/into
-во | alternative form
-не | not
-что | what/that
-он | he
-на | on/onto
-я | i
-с | from
-со | alternative form
-как | how
-а | milder form of `no' (but)
-то | conjunction and form of `that'
-все | all
-она | she
-так | so, thus
-его | him
-но | but
-да | yes/and
-ты | thou
-к | towards, by
-у | around, chez
-же | intensifier particle
-вы | you
-за | beyond, behind
-бы | conditional/subj. particle
-по | up to, along
-только | only
-ее | her
-мне | to me
-было | it was
-вот | here is/are, particle
-от | away from
-меня | me
-еще | still, yet, more
-нет | no, there isnt/arent
-о | about
-из | out of
-ему | to him
-теперь | now
-когда | when
-даже | even
-ну | so, well
-вдруг | suddenly
-ли | interrogative particle
-если | if
-уже | already, but homonym of `narrower'
-или | or
-ни | neither
-быть | to be
-был | he was
-него | prepositional form of его
-до | up to
-вас | you accusative
-нибудь | indef. suffix preceded by hyphen
-опять | again
-уж | already, but homonym of `adder'
-вам | to you
-сказал | he said
-ведь | particle `after all'
-там | there
-потом | then
-себя | oneself
-ничего | nothing
-ей | to her
-может | usually with `быть' as `maybe'
-они | they
-тут | here
-где | where
-есть | there is/are
-надо | got to, must
-ней | prepositional form of ей
-для | for
-мы | we
-тебя | thee
-их | them, their
-чем | than
-была | she was
-сам | self
-чтоб | in order to
-без | without
-будто | as if
-человек | man, person, one
-чего | genitive form of `what'
-раз | once
-тоже | also
-себе | to oneself
-под | beneath
-жизнь | life
-будет | will be
-ж | short form of intensifer particle `же'
-тогда | then
-кто | who
-этот | this
-говорил | was saying
-того | genitive form of `that'
-потому | for that reason
-этого | genitive form of `this'
-какой | which
-совсем | altogether
-ним | prepositional form of `его', `они'
-здесь | here
-этом | prepositional form of `этот'
-один | one
-почти | almost
-мой | my
-тем | instrumental/dative plural of `тот', `то'
-чтобы | full form of `in order that'
-нее | her (acc.)
-кажется | it seems
-сейчас | now
-были | they were
-куда | where to
-зачем | why
-сказать | to say
-всех | all (acc., gen. preposn. plural)
-никогда | never
-сегодня | today
-можно | possible, one can
-при | by
-наконец | finally
-два | two
-об | alternative form of `о', about
-другой | another
-хоть | even
-после | after
-над | above
-больше | more
-тот | that one (masc.)
-через | across, in
-эти | these
-нас | us
-про | about
-всего | in all, only, of all
-них | prepositional form of `они' (they)
-какая | which, feminine
-много | lots
-разве | interrogative particle
-сказала | she said
-три | three
-эту | this, acc. fem. sing.
-моя | my, feminine
-впрочем | moreover, besides
-хорошо | good
-свою | ones own, acc. fem. sing.
-этой | oblique form of `эта', fem. `this'
-перед | in front of
-иногда | sometimes
-лучше | better
-чуть | a little
-том | preposn. form of `that one'
-нельзя | one must not
-такой | such a one
-им | to them
-более | more
-всегда | always
-конечно | of course
-всю | acc. fem. sing of `all'
-между | between
-
-
- | b: some paradigms
- |
- | personal pronouns
- |
- | я меня мне мной [мною]
- | ты тебя тебе тобой [тобою]
- | он его ему им [него, нему, ним]
- | она ее эи ею [нее, нэи, нею]
- | оно его ему им [него, нему, ним]
- |
- | мы нас нам нами
- | вы вас вам вами
- | они их им ими [них, ним, ними]
- |
- | себя себе собой [собою]
- |
- | demonstrative pronouns: этот (this), тот (that)
- |
- | этот эта это эти
- | этого эты это эти
- | этого этой этого этих
- | этому этой этому этим
- | этим этой этим [этою] этими
- | этом этой этом этих
- |
- | тот та то те
- | того ту то те
- | того той того тех
- | тому той тому тем
- | тем той тем [тою] теми
- | том той том тех
- |
- | determinative pronouns
- |
- | (a) весь (all)
- |
- | весь вся все все
- | всего всю все все
- | всего всей всего всех
- | всему всей всему всем
- | всем всей всем [всею] всеми
- | всем всей всем всех
- |
- | (b) сам (himself etc)
- |
- | сам сама само сами
- | самого саму само самих
- | самого самой самого самих
- | самому самой самому самим
- | самим самой самим [самою] самими
- | самом самой самом самих
- |
- | stems of verbs `to be', `to have', `to do' and modal
- |
- | быть бы буд быв есть суть
- | име
- | дел
- | мог мож мочь
- | уме
- | хоч хот
- | долж
- | можн
- | нужн
- | нельзя
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_sv.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_sv.txt
deleted file mode 100644
index 56af95d..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_sv.txt
+++ /dev/null
@@ -1,131 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Swedish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
- | Swedish stop words occasionally exhibit homonym clashes. For example
- | så = so, but also seed. These are indicated clearly below.
-
-och | and
-det | it, this/that
-att | to (with infinitive)
-i | in, at
-en | a
-jag | I
-hon | she
-som | who, that
-han | he
-på | on
-den | it, this/that
-med | with
-var | where, each
-sig | him(self) etc
-för | for
-så | so (also: seed)
-till | to
-är | is
-men | but
-ett | a
-om | if; around, about
-hade | had
-de | they, these/those
-av | of
-icke | not, no
-mig | me
-du | you
-henne | her
-då | then, when
-sin | his
-nu | now
-har | have
-inte | inte någon = no one
-hans | his
-honom | him
-skulle | 'sake'
-hennes | her
-där | there
-min | my
-man | one (pronoun)
-ej | nor
-vid | at, by, on (also: vast)
-kunde | could
-något | some etc
-från | from, off
-ut | out
-när | when
-efter | after, behind
-upp | up
-vi | we
-dem | them
-vara | be
-vad | what
-över | over
-än | than
-dig | you
-kan | can
-sina | his
-här | here
-ha | have
-mot | towards
-alla | all
-under | under (also: wonder)
-någon | some etc
-eller | or (else)
-allt | all
-mycket | much
-sedan | since
-ju | why
-denna | this/that
-själv | myself, yourself etc
-detta | this/that
-åt | to
-utan | without
-varit | was
-hur | how
-ingen | no
-mitt | my
-ni | you
-bli | to be, become
-blev | from bli
-oss | us
-din | thy
-dessa | these/those
-några | some etc
-deras | their
-blir | from bli
-mina | my
-samma | (the) same
-vilken | who, that
-er | you, your
-sådan | such a
-vår | our
-blivit | from bli
-dess | its
-inom | within
-mellan | between
-sådant | such a
-varför | why
-varje | each
-vilka | who, that
-ditt | thy
-vem | who
-vilket | who, that
-sitta | his
-sådana | such a
-vart | each
-dina | thy
-vars | whose
-vårt | our
-våra | our
-ert | your
-era | your
-vilkas | whose
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_th.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_th.txt
deleted file mode 100644
index 1415edf..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_th.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-# Thai stopwords from:
-# "Opinion Detection in Thai Political News Columns
-# Based on Subjectivity Analysis"
-# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
-ไว้
-ไม่
-ไป
-ได้
-ให้
-ใน
-โดย
-แห่ง
-แล้ว
-และ
-แรก
-แบบ
-แต่
-เอง
-เห็น
-เลย
-เริ่ม
-เรา
-เมื่อ
-เพื่อ
-เพราะ
-เป็นการ
-เป็น
-เปิดเผย
-เปิด
-เนื่องจาก
-เดียวกัน
-เดียว
-เช่น
-เฉพาะ
-เคย
-เข้า
-เขา
-อีก
-อาจ
-อะไร
-ออก
-อย่าง
-อยู่
-อยาก
-หาก
-หลาย
-หลังจาก
-หลัง
-หรือ
-หนึ่ง
-ส่วน
-ส่ง
-สุด
-สําหรับ
-ว่า
-วัน
-ลง
-ร่วม
-ราย
-รับ
-ระหว่าง
-รวม
-ยัง
-มี
-มาก
-มา
-พร้อม
-พบ
-ผ่าน
-ผล
-บาง
-น่า
-นี้
-นํา
-นั้น
-นัก
-นอกจาก
-ทุก
-ที่สุด
-ที่
-ทําให้
-ทํา
-ทาง
-ทั้งนี้
-ทั้ง
-ถ้า
-ถูก
-ถึง
-ต้อง
-ต่างๆ
-ต่าง
-ต่อ
-ตาม
-ตั้งแต่
-ตั้ง
-ด้าน
-ด้วย
-ดัง
-ซึ่ง
-ช่วง
-จึง
-จาก
-จัด
-จะ
-คือ
-ความ
-ครั้ง
-คง
-ขึ้น
-ของ
-ขอ
-ขณะ
-ก่อน
-ก็
-การ
-กับ
-กัน
-กว่า
-กล่าว
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_tr.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_tr.txt
deleted file mode 100644
index fe4083b..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/stopwords_tr.txt
+++ /dev/null
@@ -1,212 +0,0 @@
-# Turkish stopwords from LUCENE-559
-# merged with the list from "Information Retrieval on Turkish Texts"
-# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
-acaba
-altmış
-altı
-ama
-ancak
-arada
-aslında
-ayrıca
-bana
-bazı
-belki
-ben
-benden
-beni
-benim
-beri
-beş
-bile
-bin
-bir
-birçok
-biri
-birkaç
-birkez
-birşey
-birşeyi
-biz
-bize
-bizden
-bizi
-bizim
-böyle
-böylece
-bu
-buna
-bunda
-bundan
-bunlar
-bunları
-bunların
-bunu
-bunun
-burada
-çok
-çünkü
-da
-daha
-dahi
-de
-defa
-değil
-diğer
-diye
-doksan
-dokuz
-dolayı
-dolayısıyla
-dört
-edecek
-eden
-ederek
-edilecek
-ediliyor
-edilmesi
-ediyor
-eğer
-elli
-en
-etmesi
-etti
-ettiği
-ettiğini
-gibi
-göre
-halen
-hangi
-hatta
-hem
-henüz
-hep
-hepsi
-her
-herhangi
-herkesin
-hiç
-hiçbir
-için
-iki
-ile
-ilgili
-ise
-işte
-itibaren
-itibariyle
-kadar
-karşın
-katrilyon
-kendi
-kendilerine
-kendini
-kendisi
-kendisine
-kendisini
-kez
-ki
-kim
-kimden
-kime
-kimi
-kimse
-kırk
-milyar
-milyon
-mu
-mü
-mı
-nasıl
-ne
-neden
-nedenle
-nerde
-nerede
-nereye
-niye
-niçin
-o
-olan
-olarak
-oldu
-olduğu
-olduğunu
-olduklarını
-olmadı
-olmadığı
-olmak
-olması
-olmayan
-olmaz
-olsa
-olsun
-olup
-olur
-olursa
-oluyor
-on
-ona
-ondan
-onlar
-onlardan
-onları
-onların
-onu
-onun
-otuz
-oysa
-öyle
-pek
-rağmen
-sadece
-sanki
-sekiz
-seksen
-sen
-senden
-seni
-senin
-siz
-sizden
-sizi
-sizin
-şey
-şeyden
-şeyi
-şeyler
-şöyle
-şu
-şuna
-şunda
-şundan
-şunları
-şunu
-tarafından
-trilyon
-tüm
-üç
-üzere
-var
-vardı
-ve
-veya
-ya
-yani
-yapacak
-yapılan
-yapılması
-yapıyor
-yapmak
-yaptı
-yaptığı
-yaptığını
-yaptıkları
-yedi
-yerine
-yetmiş
-yine
-yirmi
-yoksa
-yüz
-zaten
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/lang/userdict_ja.txt b/src/test/resources/solr-home-4.1/collection1/conf/lang/userdict_ja.txt
deleted file mode 100644
index c79de82..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/lang/userdict_ja.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-#
-# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
-#
-# Add entries to this file in order to override the statistical model in terms
-# of segmentation, readings and part-of-speech tags. Notice that entries do
-# not have weights since they are always used when found. This is by-design
-# in order to maximize ease-of-use.
-#
-# Entries are defined using the following CSV format:
-# , ... , ... ,
-#
-# Notice that a single half-width space separates tokens and readings, and
-# that the number tokens and readings must match exactly.
-#
-# Also notice that multiple entries with the same is undefined.
-#
-# Whitespace only lines are ignored. Comments are not allowed on entry lines.
-#
-
-# Custom segmentation for kanji compounds
-日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞
-関西国際空港,関西 国際 空港,カンサイ コクサイ クウコウ,カスタム名詞
-
-# Custom segmentation for compound katakana
-トートバッグ,トート バッグ,トート バッグ,かずカナ名詞
-ショルダーバッグ,ショルダー バッグ,ショルダー バッグ,かずカナ名詞
-
-# Custom reading for former sumo wrestler
-朝青龍,朝青龍,アサショウリュウ,カスタム人名
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/mapping-FoldToASCII.txt b/src/test/resources/solr-home-4.1/collection1/conf/mapping-FoldToASCII.txt
deleted file mode 100644
index 020f833..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/mapping-FoldToASCII.txt
+++ /dev/null
@@ -1,3813 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-# This map converts alphabetic, numeric, and symbolic Unicode characters
-# which are not in the first 127 ASCII characters (the "Basic Latin" Unicode
-# block) into their ASCII equivalents, if one exists.
-#
-# Characters from the following Unicode blocks are converted; however, only
-# those characters with reasonable ASCII alternatives are converted:
-#
-# - C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf
-# - Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf
-# - Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf
-# - Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf
-# - Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf
-# - Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf
-# - IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf
-# - Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf
-# - Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf
-# - General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf
-# - Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf
-# - Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf
-# - Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf
-# - Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf
-# - Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf
-# - Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf
-#
-# See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode
-#
-# The set of character conversions supported by this map is a superset of
-# those supported by the map represented by mapping-ISOLatin1Accent.txt.
-#
-# See the bottom of this file for the Perl script used to generate the contents
-# of this file (without this header) from ASCIIFoldingFilter.java.
-
-
-# Syntax:
-# "source" => "target"
-# "source".length() > 0 (source cannot be empty.)
-# "target".length() >= 0 (target can be empty.)
-
-
-# À [LATIN CAPITAL LETTER A WITH GRAVE]
-"\u00C0" => "A"
-
-# Á [LATIN CAPITAL LETTER A WITH ACUTE]
-"\u00C1" => "A"
-
-# Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
-"\u00C2" => "A"
-
-# Ã [LATIN CAPITAL LETTER A WITH TILDE]
-"\u00C3" => "A"
-
-# Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
-"\u00C4" => "A"
-
-# Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
-"\u00C5" => "A"
-
-# Ā [LATIN CAPITAL LETTER A WITH MACRON]
-"\u0100" => "A"
-
-# Ă [LATIN CAPITAL LETTER A WITH BREVE]
-"\u0102" => "A"
-
-# Ą [LATIN CAPITAL LETTER A WITH OGONEK]
-"\u0104" => "A"
-
-# Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
-"\u018F" => "A"
-
-# Ǎ [LATIN CAPITAL LETTER A WITH CARON]
-"\u01CD" => "A"
-
-# Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
-"\u01DE" => "A"
-
-# Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
-"\u01E0" => "A"
-
-# Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
-"\u01FA" => "A"
-
-# Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
-"\u0200" => "A"
-
-# Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
-"\u0202" => "A"
-
-# Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
-"\u0226" => "A"
-
-# Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
-"\u023A" => "A"
-
-# ᴀ [LATIN LETTER SMALL CAPITAL A]
-"\u1D00" => "A"
-
-# Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
-"\u1E00" => "A"
-
-# Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
-"\u1EA0" => "A"
-
-# Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
-"\u1EA2" => "A"
-
-# Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
-"\u1EA4" => "A"
-
-# Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
-"\u1EA6" => "A"
-
-# Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EA8" => "A"
-
-# Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
-"\u1EAA" => "A"
-
-# Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EAC" => "A"
-
-# Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
-"\u1EAE" => "A"
-
-# Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
-"\u1EB0" => "A"
-
-# Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
-"\u1EB2" => "A"
-
-# Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
-"\u1EB4" => "A"
-
-# Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
-"\u1EB6" => "A"
-
-# Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
-"\u24B6" => "A"
-
-# A [FULLWIDTH LATIN CAPITAL LETTER A]
-"\uFF21" => "A"
-
-# à [LATIN SMALL LETTER A WITH GRAVE]
-"\u00E0" => "a"
-
-# á [LATIN SMALL LETTER A WITH ACUTE]
-"\u00E1" => "a"
-
-# â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
-"\u00E2" => "a"
-
-# ã [LATIN SMALL LETTER A WITH TILDE]
-"\u00E3" => "a"
-
-# ä [LATIN SMALL LETTER A WITH DIAERESIS]
-"\u00E4" => "a"
-
-# å [LATIN SMALL LETTER A WITH RING ABOVE]
-"\u00E5" => "a"
-
-# ā [LATIN SMALL LETTER A WITH MACRON]
-"\u0101" => "a"
-
-# ă [LATIN SMALL LETTER A WITH BREVE]
-"\u0103" => "a"
-
-# ą [LATIN SMALL LETTER A WITH OGONEK]
-"\u0105" => "a"
-
-# ǎ [LATIN SMALL LETTER A WITH CARON]
-"\u01CE" => "a"
-
-# ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
-"\u01DF" => "a"
-
-# ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
-"\u01E1" => "a"
-
-# ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
-"\u01FB" => "a"
-
-# ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
-"\u0201" => "a"
-
-# ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
-"\u0203" => "a"
-
-# ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
-"\u0227" => "a"
-
-# ɐ [LATIN SMALL LETTER TURNED A]
-"\u0250" => "a"
-
-# ə [LATIN SMALL LETTER SCHWA]
-"\u0259" => "a"
-
-# ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
-"\u025A" => "a"
-
-# ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
-"\u1D8F" => "a"
-
-# ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
-"\u1D95" => "a"
-
-# ạ [LATIN SMALL LETTER A WITH RING BELOW]
-"\u1E01" => "a"
-
-# ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
-"\u1E9A" => "a"
-
-# ạ [LATIN SMALL LETTER A WITH DOT BELOW]
-"\u1EA1" => "a"
-
-# ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
-"\u1EA3" => "a"
-
-# ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
-"\u1EA5" => "a"
-
-# ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
-"\u1EA7" => "a"
-
-# ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EA9" => "a"
-
-# ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
-"\u1EAB" => "a"
-
-# ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EAD" => "a"
-
-# ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
-"\u1EAF" => "a"
-
-# ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
-"\u1EB1" => "a"
-
-# ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
-"\u1EB3" => "a"
-
-# ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
-"\u1EB5" => "a"
-
-# ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
-"\u1EB7" => "a"
-
-# ₐ [LATIN SUBSCRIPT SMALL LETTER A]
-"\u2090" => "a"
-
-# ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
-"\u2094" => "a"
-
-# ⓐ [CIRCLED LATIN SMALL LETTER A]
-"\u24D0" => "a"
-
-# ⱥ [LATIN SMALL LETTER A WITH STROKE]
-"\u2C65" => "a"
-
-# Ɐ [LATIN CAPITAL LETTER TURNED A]
-"\u2C6F" => "a"
-
-# a [FULLWIDTH LATIN SMALL LETTER A]
-"\uFF41" => "a"
-
-# Ꜳ [LATIN CAPITAL LETTER AA]
-"\uA732" => "AA"
-
-# Æ [LATIN CAPITAL LETTER AE]
-"\u00C6" => "AE"
-
-# Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
-"\u01E2" => "AE"
-
-# Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
-"\u01FC" => "AE"
-
-# ᴁ [LATIN LETTER SMALL CAPITAL AE]
-"\u1D01" => "AE"
-
-# Ꜵ [LATIN CAPITAL LETTER AO]
-"\uA734" => "AO"
-
-# Ꜷ [LATIN CAPITAL LETTER AU]
-"\uA736" => "AU"
-
-# Ꜹ [LATIN CAPITAL LETTER AV]
-"\uA738" => "AV"
-
-# Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
-"\uA73A" => "AV"
-
-# Ꜽ [LATIN CAPITAL LETTER AY]
-"\uA73C" => "AY"
-
-# ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
-"\u249C" => "(a)"
-
-# ꜳ [LATIN SMALL LETTER AA]
-"\uA733" => "aa"
-
-# æ [LATIN SMALL LETTER AE]
-"\u00E6" => "ae"
-
-# ǣ [LATIN SMALL LETTER AE WITH MACRON]
-"\u01E3" => "ae"
-
-# ǽ [LATIN SMALL LETTER AE WITH ACUTE]
-"\u01FD" => "ae"
-
-# ᴂ [LATIN SMALL LETTER TURNED AE]
-"\u1D02" => "ae"
-
-# ꜵ [LATIN SMALL LETTER AO]
-"\uA735" => "ao"
-
-# ꜷ [LATIN SMALL LETTER AU]
-"\uA737" => "au"
-
-# ꜹ [LATIN SMALL LETTER AV]
-"\uA739" => "av"
-
-# ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
-"\uA73B" => "av"
-
-# ꜽ [LATIN SMALL LETTER AY]
-"\uA73D" => "ay"
-
-# Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
-"\u0181" => "B"
-
-# Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
-"\u0182" => "B"
-
-# Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
-"\u0243" => "B"
-
-# ʙ [LATIN LETTER SMALL CAPITAL B]
-"\u0299" => "B"
-
-# ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
-"\u1D03" => "B"
-
-# Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
-"\u1E02" => "B"
-
-# Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
-"\u1E04" => "B"
-
-# Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
-"\u1E06" => "B"
-
-# Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
-"\u24B7" => "B"
-
-# B [FULLWIDTH LATIN CAPITAL LETTER B]
-"\uFF22" => "B"
-
-# ƀ [LATIN SMALL LETTER B WITH STROKE]
-"\u0180" => "b"
-
-# ƃ [LATIN SMALL LETTER B WITH TOPBAR]
-"\u0183" => "b"
-
-# ɓ [LATIN SMALL LETTER B WITH HOOK]
-"\u0253" => "b"
-
-# ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
-"\u1D6C" => "b"
-
-# ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
-"\u1D80" => "b"
-
-# ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
-"\u1E03" => "b"
-
-# ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
-"\u1E05" => "b"
-
-# ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
-"\u1E07" => "b"
-
-# ⓑ [CIRCLED LATIN SMALL LETTER B]
-"\u24D1" => "b"
-
-# b [FULLWIDTH LATIN SMALL LETTER B]
-"\uFF42" => "b"
-
-# ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
-"\u249D" => "(b)"
-
-# Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
-"\u00C7" => "C"
-
-# Ć [LATIN CAPITAL LETTER C WITH ACUTE]
-"\u0106" => "C"
-
-# Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
-"\u0108" => "C"
-
-# Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
-"\u010A" => "C"
-
-# Č [LATIN CAPITAL LETTER C WITH CARON]
-"\u010C" => "C"
-
-# Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
-"\u0187" => "C"
-
-# Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
-"\u023B" => "C"
-
-# ʗ [LATIN LETTER STRETCHED C]
-"\u0297" => "C"
-
-# ᴄ [LATIN LETTER SMALL CAPITAL C]
-"\u1D04" => "C"
-
-# Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
-"\u1E08" => "C"
-
-# Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
-"\u24B8" => "C"
-
-# C [FULLWIDTH LATIN CAPITAL LETTER C]
-"\uFF23" => "C"
-
-# ç [LATIN SMALL LETTER C WITH CEDILLA]
-"\u00E7" => "c"
-
-# ć [LATIN SMALL LETTER C WITH ACUTE]
-"\u0107" => "c"
-
-# ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
-"\u0109" => "c"
-
-# ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
-"\u010B" => "c"
-
-# č [LATIN SMALL LETTER C WITH CARON]
-"\u010D" => "c"
-
-# ƈ [LATIN SMALL LETTER C WITH HOOK]
-"\u0188" => "c"
-
-# ȼ [LATIN SMALL LETTER C WITH STROKE]
-"\u023C" => "c"
-
-# ɕ [LATIN SMALL LETTER C WITH CURL]
-"\u0255" => "c"
-
-# ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
-"\u1E09" => "c"
-
-# ↄ [LATIN SMALL LETTER REVERSED C]
-"\u2184" => "c"
-
-# ⓒ [CIRCLED LATIN SMALL LETTER C]
-"\u24D2" => "c"
-
-# Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
-"\uA73E" => "c"
-
-# ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
-"\uA73F" => "c"
-
-# c [FULLWIDTH LATIN SMALL LETTER C]
-"\uFF43" => "c"
-
-# ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
-"\u249E" => "(c)"
-
-# Ð [LATIN CAPITAL LETTER ETH]
-"\u00D0" => "D"
-
-# Ď [LATIN CAPITAL LETTER D WITH CARON]
-"\u010E" => "D"
-
-# Đ [LATIN CAPITAL LETTER D WITH STROKE]
-"\u0110" => "D"
-
-# Ɖ [LATIN CAPITAL LETTER AFRICAN D]
-"\u0189" => "D"
-
-# Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
-"\u018A" => "D"
-
-# Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
-"\u018B" => "D"
-
-# ᴅ [LATIN LETTER SMALL CAPITAL D]
-"\u1D05" => "D"
-
-# ᴆ [LATIN LETTER SMALL CAPITAL ETH]
-"\u1D06" => "D"
-
-# Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
-"\u1E0A" => "D"
-
-# Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
-"\u1E0C" => "D"
-
-# Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
-"\u1E0E" => "D"
-
-# Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
-"\u1E10" => "D"
-
-# Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
-"\u1E12" => "D"
-
-# Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
-"\u24B9" => "D"
-
-# Ꝺ [LATIN CAPITAL LETTER INSULAR D]
-"\uA779" => "D"
-
-# D [FULLWIDTH LATIN CAPITAL LETTER D]
-"\uFF24" => "D"
-
-# ð [LATIN SMALL LETTER ETH]
-"\u00F0" => "d"
-
-# ď [LATIN SMALL LETTER D WITH CARON]
-"\u010F" => "d"
-
-# đ [LATIN SMALL LETTER D WITH STROKE]
-"\u0111" => "d"
-
-# ƌ [LATIN SMALL LETTER D WITH TOPBAR]
-"\u018C" => "d"
-
-# ȡ [LATIN SMALL LETTER D WITH CURL]
-"\u0221" => "d"
-
-# ɖ [LATIN SMALL LETTER D WITH TAIL]
-"\u0256" => "d"
-
-# ɗ [LATIN SMALL LETTER D WITH HOOK]
-"\u0257" => "d"
-
-# ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
-"\u1D6D" => "d"
-
-# ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
-"\u1D81" => "d"
-
-# ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
-"\u1D91" => "d"
-
-# ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
-"\u1E0B" => "d"
-
-# ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
-"\u1E0D" => "d"
-
-# ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
-"\u1E0F" => "d"
-
-# ḑ [LATIN SMALL LETTER D WITH CEDILLA]
-"\u1E11" => "d"
-
-# ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
-"\u1E13" => "d"
-
-# ⓓ [CIRCLED LATIN SMALL LETTER D]
-"\u24D3" => "d"
-
-# ꝺ [LATIN SMALL LETTER INSULAR D]
-"\uA77A" => "d"
-
-# d [FULLWIDTH LATIN SMALL LETTER D]
-"\uFF44" => "d"
-
-# DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
-"\u01C4" => "DZ"
-
-# DZ [LATIN CAPITAL LETTER DZ]
-"\u01F1" => "DZ"
-
-# Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
-"\u01C5" => "Dz"
-
-# Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
-"\u01F2" => "Dz"
-
-# ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
-"\u249F" => "(d)"
-
-# ȸ [LATIN SMALL LETTER DB DIGRAPH]
-"\u0238" => "db"
-
-# dž [LATIN SMALL LETTER DZ WITH CARON]
-"\u01C6" => "dz"
-
-# dz [LATIN SMALL LETTER DZ]
-"\u01F3" => "dz"
-
-# ʣ [LATIN SMALL LETTER DZ DIGRAPH]
-"\u02A3" => "dz"
-
-# ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
-"\u02A5" => "dz"
-
-# È [LATIN CAPITAL LETTER E WITH GRAVE]
-"\u00C8" => "E"
-
-# É [LATIN CAPITAL LETTER E WITH ACUTE]
-"\u00C9" => "E"
-
-# Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
-"\u00CA" => "E"
-
-# Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
-"\u00CB" => "E"
-
-# Ē [LATIN CAPITAL LETTER E WITH MACRON]
-"\u0112" => "E"
-
-# Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
-"\u0114" => "E"
-
-# Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
-"\u0116" => "E"
-
-# Ę [LATIN CAPITAL LETTER E WITH OGONEK]
-"\u0118" => "E"
-
-# Ě [LATIN CAPITAL LETTER E WITH CARON]
-"\u011A" => "E"
-
-# Ǝ [LATIN CAPITAL LETTER REVERSED E]
-"\u018E" => "E"
-
-# Ɛ [LATIN CAPITAL LETTER OPEN E]
-"\u0190" => "E"
-
-# Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
-"\u0204" => "E"
-
-# Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
-"\u0206" => "E"
-
-# Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
-"\u0228" => "E"
-
-# Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
-"\u0246" => "E"
-
-# ᴇ [LATIN LETTER SMALL CAPITAL E]
-"\u1D07" => "E"
-
-# Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
-"\u1E14" => "E"
-
-# Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
-"\u1E16" => "E"
-
-# Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
-"\u1E18" => "E"
-
-# Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
-"\u1E1A" => "E"
-
-# Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
-"\u1E1C" => "E"
-
-# Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
-"\u1EB8" => "E"
-
-# Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
-"\u1EBA" => "E"
-
-# Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
-"\u1EBC" => "E"
-
-# Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
-"\u1EBE" => "E"
-
-# Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
-"\u1EC0" => "E"
-
-# Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EC2" => "E"
-
-# Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
-"\u1EC4" => "E"
-
-# Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EC6" => "E"
-
-# Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
-"\u24BA" => "E"
-
-# ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
-"\u2C7B" => "E"
-
-# E [FULLWIDTH LATIN CAPITAL LETTER E]
-"\uFF25" => "E"
-
-# è [LATIN SMALL LETTER E WITH GRAVE]
-"\u00E8" => "e"
-
-# é [LATIN SMALL LETTER E WITH ACUTE]
-"\u00E9" => "e"
-
-# ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
-"\u00EA" => "e"
-
-# ë [LATIN SMALL LETTER E WITH DIAERESIS]
-"\u00EB" => "e"
-
-# ē [LATIN SMALL LETTER E WITH MACRON]
-"\u0113" => "e"
-
-# ĕ [LATIN SMALL LETTER E WITH BREVE]
-"\u0115" => "e"
-
-# ė [LATIN SMALL LETTER E WITH DOT ABOVE]
-"\u0117" => "e"
-
-# ę [LATIN SMALL LETTER E WITH OGONEK]
-"\u0119" => "e"
-
-# ě [LATIN SMALL LETTER E WITH CARON]
-"\u011B" => "e"
-
-# ǝ [LATIN SMALL LETTER TURNED E]
-"\u01DD" => "e"
-
-# ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
-"\u0205" => "e"
-
-# ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
-"\u0207" => "e"
-
-# ȩ [LATIN SMALL LETTER E WITH CEDILLA]
-"\u0229" => "e"
-
-# ɇ [LATIN SMALL LETTER E WITH STROKE]
-"\u0247" => "e"
-
-# ɘ [LATIN SMALL LETTER REVERSED E]
-"\u0258" => "e"
-
-# ɛ [LATIN SMALL LETTER OPEN E]
-"\u025B" => "e"
-
-# ɜ [LATIN SMALL LETTER REVERSED OPEN E]
-"\u025C" => "e"
-
-# ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
-"\u025D" => "e"
-
-# ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
-"\u025E" => "e"
-
-# ʚ [LATIN SMALL LETTER CLOSED OPEN E]
-"\u029A" => "e"
-
-# ᴈ [LATIN SMALL LETTER TURNED OPEN E]
-"\u1D08" => "e"
-
-# ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
-"\u1D92" => "e"
-
-# ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
-"\u1D93" => "e"
-
-# ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
-"\u1D94" => "e"
-
-# ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
-"\u1E15" => "e"
-
-# ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
-"\u1E17" => "e"
-
-# ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
-"\u1E19" => "e"
-
-# ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
-"\u1E1B" => "e"
-
-# ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
-"\u1E1D" => "e"
-
-# ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
-"\u1EB9" => "e"
-
-# ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
-"\u1EBB" => "e"
-
-# ẽ [LATIN SMALL LETTER E WITH TILDE]
-"\u1EBD" => "e"
-
-# ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
-"\u1EBF" => "e"
-
-# ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
-"\u1EC1" => "e"
-
-# ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1EC3" => "e"
-
-# ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
-"\u1EC5" => "e"
-
-# ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
-"\u1EC7" => "e"
-
-# ₑ [LATIN SUBSCRIPT SMALL LETTER E]
-"\u2091" => "e"
-
-# ⓔ [CIRCLED LATIN SMALL LETTER E]
-"\u24D4" => "e"
-
-# ⱸ [LATIN SMALL LETTER E WITH NOTCH]
-"\u2C78" => "e"
-
-# e [FULLWIDTH LATIN SMALL LETTER E]
-"\uFF45" => "e"
-
-# ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
-"\u24A0" => "(e)"
-
-# Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
-"\u0191" => "F"
-
-# Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
-"\u1E1E" => "F"
-
-# Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
-"\u24BB" => "F"
-
-# ꜰ [LATIN LETTER SMALL CAPITAL F]
-"\uA730" => "F"
-
-# Ꝼ [LATIN CAPITAL LETTER INSULAR F]
-"\uA77B" => "F"
-
-# ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
-"\uA7FB" => "F"
-
-# F [FULLWIDTH LATIN CAPITAL LETTER F]
-"\uFF26" => "F"
-
-# ƒ [LATIN SMALL LETTER F WITH HOOK]
-"\u0192" => "f"
-
-# ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
-"\u1D6E" => "f"
-
-# ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
-"\u1D82" => "f"
-
-# ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
-"\u1E1F" => "f"
-
-# ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
-"\u1E9B" => "f"
-
-# ⓕ [CIRCLED LATIN SMALL LETTER F]
-"\u24D5" => "f"
-
-# ꝼ [LATIN SMALL LETTER INSULAR F]
-"\uA77C" => "f"
-
-# f [FULLWIDTH LATIN SMALL LETTER F]
-"\uFF46" => "f"
-
-# ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
-"\u24A1" => "(f)"
-
-# ff [LATIN SMALL LIGATURE FF]
-"\uFB00" => "ff"
-
-# ffi [LATIN SMALL LIGATURE FFI]
-"\uFB03" => "ffi"
-
-# ffl [LATIN SMALL LIGATURE FFL]
-"\uFB04" => "ffl"
-
-# fi [LATIN SMALL LIGATURE FI]
-"\uFB01" => "fi"
-
-# fl [LATIN SMALL LIGATURE FL]
-"\uFB02" => "fl"
-
-# Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
-"\u011C" => "G"
-
-# Ğ [LATIN CAPITAL LETTER G WITH BREVE]
-"\u011E" => "G"
-
-# Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
-"\u0120" => "G"
-
-# Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
-"\u0122" => "G"
-
-# Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
-"\u0193" => "G"
-
-# Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
-"\u01E4" => "G"
-
-# ǥ [LATIN SMALL LETTER G WITH STROKE]
-"\u01E5" => "G"
-
-# Ǧ [LATIN CAPITAL LETTER G WITH CARON]
-"\u01E6" => "G"
-
-# ǧ [LATIN SMALL LETTER G WITH CARON]
-"\u01E7" => "G"
-
-# Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
-"\u01F4" => "G"
-
-# ɢ [LATIN LETTER SMALL CAPITAL G]
-"\u0262" => "G"
-
-# ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
-"\u029B" => "G"
-
-# Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
-"\u1E20" => "G"
-
-# Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
-"\u24BC" => "G"
-
-# Ᵹ [LATIN CAPITAL LETTER INSULAR G]
-"\uA77D" => "G"
-
-# Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
-"\uA77E" => "G"
-
-# G [FULLWIDTH LATIN CAPITAL LETTER G]
-"\uFF27" => "G"
-
-# ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
-"\u011D" => "g"
-
-# ğ [LATIN SMALL LETTER G WITH BREVE]
-"\u011F" => "g"
-
-# ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
-"\u0121" => "g"
-
-# ģ [LATIN SMALL LETTER G WITH CEDILLA]
-"\u0123" => "g"
-
-# ǵ [LATIN SMALL LETTER G WITH ACUTE]
-"\u01F5" => "g"
-
-# ɠ [LATIN SMALL LETTER G WITH HOOK]
-"\u0260" => "g"
-
-# ɡ [LATIN SMALL LETTER SCRIPT G]
-"\u0261" => "g"
-
-# ᵷ [LATIN SMALL LETTER TURNED G]
-"\u1D77" => "g"
-
-# ᵹ [LATIN SMALL LETTER INSULAR G]
-"\u1D79" => "g"
-
-# ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
-"\u1D83" => "g"
-
-# ḡ [LATIN SMALL LETTER G WITH MACRON]
-"\u1E21" => "g"
-
-# ⓖ [CIRCLED LATIN SMALL LETTER G]
-"\u24D6" => "g"
-
-# ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
-"\uA77F" => "g"
-
-# g [FULLWIDTH LATIN SMALL LETTER G]
-"\uFF47" => "g"
-
-# ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
-"\u24A2" => "(g)"
-
-# Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
-"\u0124" => "H"
-
-# Ħ [LATIN CAPITAL LETTER H WITH STROKE]
-"\u0126" => "H"
-
-# Ȟ [LATIN CAPITAL LETTER H WITH CARON]
-"\u021E" => "H"
-
-# ʜ [LATIN LETTER SMALL CAPITAL H]
-"\u029C" => "H"
-
-# Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
-"\u1E22" => "H"
-
-# Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
-"\u1E24" => "H"
-
-# Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
-"\u1E26" => "H"
-
-# Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
-"\u1E28" => "H"
-
-# Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
-"\u1E2A" => "H"
-
-# Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
-"\u24BD" => "H"
-
-# Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
-"\u2C67" => "H"
-
-# Ⱶ [LATIN CAPITAL LETTER HALF H]
-"\u2C75" => "H"
-
-# H [FULLWIDTH LATIN CAPITAL LETTER H]
-"\uFF28" => "H"
-
-# ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
-"\u0125" => "h"
-
-# ħ [LATIN SMALL LETTER H WITH STROKE]
-"\u0127" => "h"
-
-# ȟ [LATIN SMALL LETTER H WITH CARON]
-"\u021F" => "h"
-
-# ɥ [LATIN SMALL LETTER TURNED H]
-"\u0265" => "h"
-
-# ɦ [LATIN SMALL LETTER H WITH HOOK]
-"\u0266" => "h"
-
-# ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
-"\u02AE" => "h"
-
-# ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
-"\u02AF" => "h"
-
-# ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
-"\u1E23" => "h"
-
-# ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
-"\u1E25" => "h"
-
-# ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
-"\u1E27" => "h"
-
-# ḩ [LATIN SMALL LETTER H WITH CEDILLA]
-"\u1E29" => "h"
-
-# ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
-"\u1E2B" => "h"
-
-# ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
-"\u1E96" => "h"
-
-# ⓗ [CIRCLED LATIN SMALL LETTER H]
-"\u24D7" => "h"
-
-# ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
-"\u2C68" => "h"
-
-# ⱶ [LATIN SMALL LETTER HALF H]
-"\u2C76" => "h"
-
-# h [FULLWIDTH LATIN SMALL LETTER H]
-"\uFF48" => "h"
-
-# Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
-"\u01F6" => "HV"
-
-# ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
-"\u24A3" => "(h)"
-
-# ƕ [LATIN SMALL LETTER HV]
-"\u0195" => "hv"
-
-# Ì [LATIN CAPITAL LETTER I WITH GRAVE]
-"\u00CC" => "I"
-
-# Í [LATIN CAPITAL LETTER I WITH ACUTE]
-"\u00CD" => "I"
-
-# Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
-"\u00CE" => "I"
-
-# Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
-"\u00CF" => "I"
-
-# Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
-"\u0128" => "I"
-
-# Ī [LATIN CAPITAL LETTER I WITH MACRON]
-"\u012A" => "I"
-
-# Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
-"\u012C" => "I"
-
-# Į [LATIN CAPITAL LETTER I WITH OGONEK]
-"\u012E" => "I"
-
-# İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
-"\u0130" => "I"
-
-# Ɩ [LATIN CAPITAL LETTER IOTA]
-"\u0196" => "I"
-
-# Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
-"\u0197" => "I"
-
-# Ǐ [LATIN CAPITAL LETTER I WITH CARON]
-"\u01CF" => "I"
-
-# Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
-"\u0208" => "I"
-
-# Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
-"\u020A" => "I"
-
-# ɪ [LATIN LETTER SMALL CAPITAL I]
-"\u026A" => "I"
-
-# ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
-"\u1D7B" => "I"
-
-# Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
-"\u1E2C" => "I"
-
-# Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
-"\u1E2E" => "I"
-
-# Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
-"\u1EC8" => "I"
-
-# Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
-"\u1ECA" => "I"
-
-# Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
-"\u24BE" => "I"
-
-# ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
-"\uA7FE" => "I"
-
-# I [FULLWIDTH LATIN CAPITAL LETTER I]
-"\uFF29" => "I"
-
-# ì [LATIN SMALL LETTER I WITH GRAVE]
-"\u00EC" => "i"
-
-# í [LATIN SMALL LETTER I WITH ACUTE]
-"\u00ED" => "i"
-
-# î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
-"\u00EE" => "i"
-
-# ï [LATIN SMALL LETTER I WITH DIAERESIS]
-"\u00EF" => "i"
-
-# ĩ [LATIN SMALL LETTER I WITH TILDE]
-"\u0129" => "i"
-
-# ī [LATIN SMALL LETTER I WITH MACRON]
-"\u012B" => "i"
-
-# ĭ [LATIN SMALL LETTER I WITH BREVE]
-"\u012D" => "i"
-
-# į [LATIN SMALL LETTER I WITH OGONEK]
-"\u012F" => "i"
-
-# ı [LATIN SMALL LETTER DOTLESS I]
-"\u0131" => "i"
-
-# ǐ [LATIN SMALL LETTER I WITH CARON]
-"\u01D0" => "i"
-
-# ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
-"\u0209" => "i"
-
-# ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
-"\u020B" => "i"
-
-# ɨ [LATIN SMALL LETTER I WITH STROKE]
-"\u0268" => "i"
-
-# ᴉ [LATIN SMALL LETTER TURNED I]
-"\u1D09" => "i"
-
-# ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
-"\u1D62" => "i"
-
-# ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
-"\u1D7C" => "i"
-
-# ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
-"\u1D96" => "i"
-
-# ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
-"\u1E2D" => "i"
-
-# ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
-"\u1E2F" => "i"
-
-# ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
-"\u1EC9" => "i"
-
-# ị [LATIN SMALL LETTER I WITH DOT BELOW]
-"\u1ECB" => "i"
-
-# ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
-"\u2071" => "i"
-
-# ⓘ [CIRCLED LATIN SMALL LETTER I]
-"\u24D8" => "i"
-
-# i [FULLWIDTH LATIN SMALL LETTER I]
-"\uFF49" => "i"
-
-# IJ [LATIN CAPITAL LIGATURE IJ]
-"\u0132" => "IJ"
-
-# ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
-"\u24A4" => "(i)"
-
-# ij [LATIN SMALL LIGATURE IJ]
-"\u0133" => "ij"
-
-# Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
-"\u0134" => "J"
-
-# Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
-"\u0248" => "J"
-
-# ᴊ [LATIN LETTER SMALL CAPITAL J]
-"\u1D0A" => "J"
-
-# Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
-"\u24BF" => "J"
-
-# J [FULLWIDTH LATIN CAPITAL LETTER J]
-"\uFF2A" => "J"
-
-# ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
-"\u0135" => "j"
-
-# ǰ [LATIN SMALL LETTER J WITH CARON]
-"\u01F0" => "j"
-
-# ȷ [LATIN SMALL LETTER DOTLESS J]
-"\u0237" => "j"
-
-# ɉ [LATIN SMALL LETTER J WITH STROKE]
-"\u0249" => "j"
-
-# ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
-"\u025F" => "j"
-
-# ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
-"\u0284" => "j"
-
-# ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
-"\u029D" => "j"
-
-# ⓙ [CIRCLED LATIN SMALL LETTER J]
-"\u24D9" => "j"
-
-# ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
-"\u2C7C" => "j"
-
-# j [FULLWIDTH LATIN SMALL LETTER J]
-"\uFF4A" => "j"
-
-# ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
-"\u24A5" => "(j)"
-
-# Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
-"\u0136" => "K"
-
-# Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
-"\u0198" => "K"
-
-# Ǩ [LATIN CAPITAL LETTER K WITH CARON]
-"\u01E8" => "K"
-
-# ᴋ [LATIN LETTER SMALL CAPITAL K]
-"\u1D0B" => "K"
-
-# Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
-"\u1E30" => "K"
-
-# Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
-"\u1E32" => "K"
-
-# Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
-"\u1E34" => "K"
-
-# Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
-"\u24C0" => "K"
-
-# Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
-"\u2C69" => "K"
-
-# Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
-"\uA740" => "K"
-
-# Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
-"\uA742" => "K"
-
-# Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
-"\uA744" => "K"
-
-# K [FULLWIDTH LATIN CAPITAL LETTER K]
-"\uFF2B" => "K"
-
-# ķ [LATIN SMALL LETTER K WITH CEDILLA]
-"\u0137" => "k"
-
-# ƙ [LATIN SMALL LETTER K WITH HOOK]
-"\u0199" => "k"
-
-# ǩ [LATIN SMALL LETTER K WITH CARON]
-"\u01E9" => "k"
-
-# ʞ [LATIN SMALL LETTER TURNED K]
-"\u029E" => "k"
-
-# ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
-"\u1D84" => "k"
-
-# ḱ [LATIN SMALL LETTER K WITH ACUTE]
-"\u1E31" => "k"
-
-# ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
-"\u1E33" => "k"
-
-# ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
-"\u1E35" => "k"
-
-# ⓚ [CIRCLED LATIN SMALL LETTER K]
-"\u24DA" => "k"
-
-# ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
-"\u2C6A" => "k"
-
-# ꝁ [LATIN SMALL LETTER K WITH STROKE]
-"\uA741" => "k"
-
-# ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
-"\uA743" => "k"
-
-# ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
-"\uA745" => "k"
-
-# k [FULLWIDTH LATIN SMALL LETTER K]
-"\uFF4B" => "k"
-
-# ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
-"\u24A6" => "(k)"
-
-# Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
-"\u0139" => "L"
-
-# Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
-"\u013B" => "L"
-
-# Ľ [LATIN CAPITAL LETTER L WITH CARON]
-"\u013D" => "L"
-
-# Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
-"\u013F" => "L"
-
-# Ł [LATIN CAPITAL LETTER L WITH STROKE]
-"\u0141" => "L"
-
-# Ƚ [LATIN CAPITAL LETTER L WITH BAR]
-"\u023D" => "L"
-
-# ʟ [LATIN LETTER SMALL CAPITAL L]
-"\u029F" => "L"
-
-# ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
-"\u1D0C" => "L"
-
-# Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
-"\u1E36" => "L"
-
-# Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
-"\u1E38" => "L"
-
-# Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
-"\u1E3A" => "L"
-
-# Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
-"\u1E3C" => "L"
-
-# Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
-"\u24C1" => "L"
-
-# Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
-"\u2C60" => "L"
-
-# Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
-"\u2C62" => "L"
-
-# Ꝇ [LATIN CAPITAL LETTER BROKEN L]
-"\uA746" => "L"
-
-# Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
-"\uA748" => "L"
-
-# Ꞁ [LATIN CAPITAL LETTER TURNED L]
-"\uA780" => "L"
-
-# L [FULLWIDTH LATIN CAPITAL LETTER L]
-"\uFF2C" => "L"
-
-# ĺ [LATIN SMALL LETTER L WITH ACUTE]
-"\u013A" => "l"
-
-# ļ [LATIN SMALL LETTER L WITH CEDILLA]
-"\u013C" => "l"
-
-# ľ [LATIN SMALL LETTER L WITH CARON]
-"\u013E" => "l"
-
-# ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
-"\u0140" => "l"
-
-# ł [LATIN SMALL LETTER L WITH STROKE]
-"\u0142" => "l"
-
-# ƚ [LATIN SMALL LETTER L WITH BAR]
-"\u019A" => "l"
-
-# ȴ [LATIN SMALL LETTER L WITH CURL]
-"\u0234" => "l"
-
-# ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
-"\u026B" => "l"
-
-# ɬ [LATIN SMALL LETTER L WITH BELT]
-"\u026C" => "l"
-
-# ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
-"\u026D" => "l"
-
-# ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
-"\u1D85" => "l"
-
-# ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
-"\u1E37" => "l"
-
-# ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
-"\u1E39" => "l"
-
-# ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
-"\u1E3B" => "l"
-
-# ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
-"\u1E3D" => "l"
-
-# ⓛ [CIRCLED LATIN SMALL LETTER L]
-"\u24DB" => "l"
-
-# ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
-"\u2C61" => "l"
-
-# ꝇ [LATIN SMALL LETTER BROKEN L]
-"\uA747" => "l"
-
-# ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
-"\uA749" => "l"
-
-# ꞁ [LATIN SMALL LETTER TURNED L]
-"\uA781" => "l"
-
-# l [FULLWIDTH LATIN SMALL LETTER L]
-"\uFF4C" => "l"
-
-# LJ [LATIN CAPITAL LETTER LJ]
-"\u01C7" => "LJ"
-
-# Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
-"\u1EFA" => "LL"
-
-# Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
-"\u01C8" => "Lj"
-
-# ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
-"\u24A7" => "(l)"
-
-# lj [LATIN SMALL LETTER LJ]
-"\u01C9" => "lj"
-
-# ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
-"\u1EFB" => "ll"
-
-# ʪ [LATIN SMALL LETTER LS DIGRAPH]
-"\u02AA" => "ls"
-
-# ʫ [LATIN SMALL LETTER LZ DIGRAPH]
-"\u02AB" => "lz"
-
-# Ɯ [LATIN CAPITAL LETTER TURNED M]
-"\u019C" => "M"
-
-# ᴍ [LATIN LETTER SMALL CAPITAL M]
-"\u1D0D" => "M"
-
-# Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
-"\u1E3E" => "M"
-
-# Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
-"\u1E40" => "M"
-
-# Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
-"\u1E42" => "M"
-
-# Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
-"\u24C2" => "M"
-
-# Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
-"\u2C6E" => "M"
-
-# ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
-"\uA7FD" => "M"
-
-# ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
-"\uA7FF" => "M"
-
-# M [FULLWIDTH LATIN CAPITAL LETTER M]
-"\uFF2D" => "M"
-
-# ɯ [LATIN SMALL LETTER TURNED M]
-"\u026F" => "m"
-
-# ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
-"\u0270" => "m"
-
-# ɱ [LATIN SMALL LETTER M WITH HOOK]
-"\u0271" => "m"
-
-# ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
-"\u1D6F" => "m"
-
-# ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
-"\u1D86" => "m"
-
-# ḿ [LATIN SMALL LETTER M WITH ACUTE]
-"\u1E3F" => "m"
-
-# ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
-"\u1E41" => "m"
-
-# ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
-"\u1E43" => "m"
-
-# ⓜ [CIRCLED LATIN SMALL LETTER M]
-"\u24DC" => "m"
-
-# m [FULLWIDTH LATIN SMALL LETTER M]
-"\uFF4D" => "m"
-
-# ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
-"\u24A8" => "(m)"
-
-# Ñ [LATIN CAPITAL LETTER N WITH TILDE]
-"\u00D1" => "N"
-
-# Ń [LATIN CAPITAL LETTER N WITH ACUTE]
-"\u0143" => "N"
-
-# Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
-"\u0145" => "N"
-
-# Ň [LATIN CAPITAL LETTER N WITH CARON]
-"\u0147" => "N"
-
-# Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
-"\u014A" => "N"
-
-# Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
-"\u019D" => "N"
-
-# Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
-"\u01F8" => "N"
-
-# Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
-"\u0220" => "N"
-
-# ɴ [LATIN LETTER SMALL CAPITAL N]
-"\u0274" => "N"
-
-# ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
-"\u1D0E" => "N"
-
-# Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
-"\u1E44" => "N"
-
-# Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
-"\u1E46" => "N"
-
-# Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
-"\u1E48" => "N"
-
-# Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
-"\u1E4A" => "N"
-
-# Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
-"\u24C3" => "N"
-
-# N [FULLWIDTH LATIN CAPITAL LETTER N]
-"\uFF2E" => "N"
-
-# ñ [LATIN SMALL LETTER N WITH TILDE]
-"\u00F1" => "n"
-
-# ń [LATIN SMALL LETTER N WITH ACUTE]
-"\u0144" => "n"
-
-# ņ [LATIN SMALL LETTER N WITH CEDILLA]
-"\u0146" => "n"
-
-# ň [LATIN SMALL LETTER N WITH CARON]
-"\u0148" => "n"
-
-# ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
-"\u0149" => "n"
-
-# ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
-"\u014B" => "n"
-
-# ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
-"\u019E" => "n"
-
-# ǹ [LATIN SMALL LETTER N WITH GRAVE]
-"\u01F9" => "n"
-
-# ȵ [LATIN SMALL LETTER N WITH CURL]
-"\u0235" => "n"
-
-# ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
-"\u0272" => "n"
-
-# ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
-"\u0273" => "n"
-
-# ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
-"\u1D70" => "n"
-
-# ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
-"\u1D87" => "n"
-
-# ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
-"\u1E45" => "n"
-
-# ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
-"\u1E47" => "n"
-
-# ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
-"\u1E49" => "n"
-
-# ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
-"\u1E4B" => "n"
-
-# ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
-"\u207F" => "n"
-
-# ⓝ [CIRCLED LATIN SMALL LETTER N]
-"\u24DD" => "n"
-
-# n [FULLWIDTH LATIN SMALL LETTER N]
-"\uFF4E" => "n"
-
-# NJ [LATIN CAPITAL LETTER NJ]
-"\u01CA" => "NJ"
-
-# Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
-"\u01CB" => "Nj"
-
-# ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
-"\u24A9" => "(n)"
-
-# nj [LATIN SMALL LETTER NJ]
-"\u01CC" => "nj"
-
-# Ò [LATIN CAPITAL LETTER O WITH GRAVE]
-"\u00D2" => "O"
-
-# Ó [LATIN CAPITAL LETTER O WITH ACUTE]
-"\u00D3" => "O"
-
-# Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
-"\u00D4" => "O"
-
-# Õ [LATIN CAPITAL LETTER O WITH TILDE]
-"\u00D5" => "O"
-
-# Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
-"\u00D6" => "O"
-
-# Ø [LATIN CAPITAL LETTER O WITH STROKE]
-"\u00D8" => "O"
-
-# Ō [LATIN CAPITAL LETTER O WITH MACRON]
-"\u014C" => "O"
-
-# Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
-"\u014E" => "O"
-
-# Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
-"\u0150" => "O"
-
-# Ɔ [LATIN CAPITAL LETTER OPEN O]
-"\u0186" => "O"
-
-# Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
-"\u019F" => "O"
-
-# Ơ [LATIN CAPITAL LETTER O WITH HORN]
-"\u01A0" => "O"
-
-# Ǒ [LATIN CAPITAL LETTER O WITH CARON]
-"\u01D1" => "O"
-
-# Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
-"\u01EA" => "O"
-
-# Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
-"\u01EC" => "O"
-
-# Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
-"\u01FE" => "O"
-
-# Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
-"\u020C" => "O"
-
-# Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
-"\u020E" => "O"
-
-# Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
-"\u022A" => "O"
-
-# Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
-"\u022C" => "O"
-
-# Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
-"\u022E" => "O"
-
-# Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
-"\u0230" => "O"
-
-# ᴏ [LATIN LETTER SMALL CAPITAL O]
-"\u1D0F" => "O"
-
-# ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
-"\u1D10" => "O"
-
-# Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
-"\u1E4C" => "O"
-
-# Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
-"\u1E4E" => "O"
-
-# Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
-"\u1E50" => "O"
-
-# Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
-"\u1E52" => "O"
-
-# Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
-"\u1ECC" => "O"
-
-# Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
-"\u1ECE" => "O"
-
-# Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
-"\u1ED0" => "O"
-
-# Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
-"\u1ED2" => "O"
-
-# Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1ED4" => "O"
-
-# Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
-"\u1ED6" => "O"
-
-# Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-"\u1ED8" => "O"
-
-# Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
-"\u1EDA" => "O"
-
-# Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
-"\u1EDC" => "O"
-
-# Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
-"\u1EDE" => "O"
-
-# Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
-"\u1EE0" => "O"
-
-# Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
-"\u1EE2" => "O"
-
-# Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
-"\u24C4" => "O"
-
-# Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
-"\uA74A" => "O"
-
-# Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
-"\uA74C" => "O"
-
-# O [FULLWIDTH LATIN CAPITAL LETTER O]
-"\uFF2F" => "O"
-
-# ò [LATIN SMALL LETTER O WITH GRAVE]
-"\u00F2" => "o"
-
-# ó [LATIN SMALL LETTER O WITH ACUTE]
-"\u00F3" => "o"
-
-# ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
-"\u00F4" => "o"
-
-# õ [LATIN SMALL LETTER O WITH TILDE]
-"\u00F5" => "o"
-
-# ö [LATIN SMALL LETTER O WITH DIAERESIS]
-"\u00F6" => "o"
-
-# ø [LATIN SMALL LETTER O WITH STROKE]
-"\u00F8" => "o"
-
-# ō [LATIN SMALL LETTER O WITH MACRON]
-"\u014D" => "o"
-
-# ŏ [LATIN SMALL LETTER O WITH BREVE]
-"\u014F" => "o"
-
-# ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
-"\u0151" => "o"
-
-# ơ [LATIN SMALL LETTER O WITH HORN]
-"\u01A1" => "o"
-
-# ǒ [LATIN SMALL LETTER O WITH CARON]
-"\u01D2" => "o"
-
-# ǫ [LATIN SMALL LETTER O WITH OGONEK]
-"\u01EB" => "o"
-
-# ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
-"\u01ED" => "o"
-
-# ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
-"\u01FF" => "o"
-
-# ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
-"\u020D" => "o"
-
-# ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
-"\u020F" => "o"
-
-# ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
-"\u022B" => "o"
-
-# ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
-"\u022D" => "o"
-
-# ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
-"\u022F" => "o"
-
-# ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
-"\u0231" => "o"
-
-# ɔ [LATIN SMALL LETTER OPEN O]
-"\u0254" => "o"
-
-# ɵ [LATIN SMALL LETTER BARRED O]
-"\u0275" => "o"
-
-# ᴖ [LATIN SMALL LETTER TOP HALF O]
-"\u1D16" => "o"
-
-# ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
-"\u1D17" => "o"
-
-# ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
-"\u1D97" => "o"
-
-# ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
-"\u1E4D" => "o"
-
-# ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
-"\u1E4F" => "o"
-
-# ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
-"\u1E51" => "o"
-
-# ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
-"\u1E53" => "o"
-
-# ọ [LATIN SMALL LETTER O WITH DOT BELOW]
-"\u1ECD" => "o"
-
-# ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
-"\u1ECF" => "o"
-
-# ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
-"\u1ED1" => "o"
-
-# ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
-"\u1ED3" => "o"
-
-# ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
-"\u1ED5" => "o"
-
-# ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
-"\u1ED7" => "o"
-
-# ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
-"\u1ED9" => "o"
-
-# ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
-"\u1EDB" => "o"
-
-# ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
-"\u1EDD" => "o"
-
-# ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
-"\u1EDF" => "o"
-
-# ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
-"\u1EE1" => "o"
-
-# ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
-"\u1EE3" => "o"
-
-# ₒ [LATIN SUBSCRIPT SMALL LETTER O]
-"\u2092" => "o"
-
-# ⓞ [CIRCLED LATIN SMALL LETTER O]
-"\u24DE" => "o"
-
-# ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
-"\u2C7A" => "o"
-
-# ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
-"\uA74B" => "o"
-
-# ꝍ [LATIN SMALL LETTER O WITH LOOP]
-"\uA74D" => "o"
-
-# o [FULLWIDTH LATIN SMALL LETTER O]
-"\uFF4F" => "o"
-
-# Œ [LATIN CAPITAL LIGATURE OE]
-"\u0152" => "OE"
-
-# ɶ [LATIN LETTER SMALL CAPITAL OE]
-"\u0276" => "OE"
-
-# Ꝏ [LATIN CAPITAL LETTER OO]
-"\uA74E" => "OO"
-
-# Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
-"\u0222" => "OU"
-
-# ᴕ [LATIN LETTER SMALL CAPITAL OU]
-"\u1D15" => "OU"
-
-# ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
-"\u24AA" => "(o)"
-
-# œ [LATIN SMALL LIGATURE OE]
-"\u0153" => "oe"
-
-# ᴔ [LATIN SMALL LETTER TURNED OE]
-"\u1D14" => "oe"
-
-# ꝏ [LATIN SMALL LETTER OO]
-"\uA74F" => "oo"
-
-# ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
-"\u0223" => "ou"
-
-# Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
-"\u01A4" => "P"
-
-# ᴘ [LATIN LETTER SMALL CAPITAL P]
-"\u1D18" => "P"
-
-# Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
-"\u1E54" => "P"
-
-# Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
-"\u1E56" => "P"
-
-# Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
-"\u24C5" => "P"
-
-# Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
-"\u2C63" => "P"
-
-# Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
-"\uA750" => "P"
-
-# Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
-"\uA752" => "P"
-
-# Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
-"\uA754" => "P"
-
-# P [FULLWIDTH LATIN CAPITAL LETTER P]
-"\uFF30" => "P"
-
-# ƥ [LATIN SMALL LETTER P WITH HOOK]
-"\u01A5" => "p"
-
-# ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
-"\u1D71" => "p"
-
-# ᵽ [LATIN SMALL LETTER P WITH STROKE]
-"\u1D7D" => "p"
-
-# ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
-"\u1D88" => "p"
-
-# ṕ [LATIN SMALL LETTER P WITH ACUTE]
-"\u1E55" => "p"
-
-# ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
-"\u1E57" => "p"
-
-# ⓟ [CIRCLED LATIN SMALL LETTER P]
-"\u24DF" => "p"
-
-# ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
-"\uA751" => "p"
-
-# ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
-"\uA753" => "p"
-
-# ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
-"\uA755" => "p"
-
-# ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
-"\uA7FC" => "p"
-
-# p [FULLWIDTH LATIN SMALL LETTER P]
-"\uFF50" => "p"
-
-# ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
-"\u24AB" => "(p)"
-
-# Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
-"\u024A" => "Q"
-
-# Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
-"\u24C6" => "Q"
-
-# Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
-"\uA756" => "Q"
-
-# Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
-"\uA758" => "Q"
-
-# Q [FULLWIDTH LATIN CAPITAL LETTER Q]
-"\uFF31" => "Q"
-
-# ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
-"\u0138" => "q"
-
-# ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
-"\u024B" => "q"
-
-# ʠ [LATIN SMALL LETTER Q WITH HOOK]
-"\u02A0" => "q"
-
-# ⓠ [CIRCLED LATIN SMALL LETTER Q]
-"\u24E0" => "q"
-
-# ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
-"\uA757" => "q"
-
-# ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
-"\uA759" => "q"
-
-# q [FULLWIDTH LATIN SMALL LETTER Q]
-"\uFF51" => "q"
-
-# ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
-"\u24AC" => "(q)"
-
-# ȹ [LATIN SMALL LETTER QP DIGRAPH]
-"\u0239" => "qp"
-
-# Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
-"\u0154" => "R"
-
-# Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
-"\u0156" => "R"
-
-# Ř [LATIN CAPITAL LETTER R WITH CARON]
-"\u0158" => "R"
-
-# Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
-"\u0210" => "R"
-
-# Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
-"\u0212" => "R"
-
-# Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
-"\u024C" => "R"
-
-# ʀ [LATIN LETTER SMALL CAPITAL R]
-"\u0280" => "R"
-
-# ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
-"\u0281" => "R"
-
-# ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
-"\u1D19" => "R"
-
-# ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
-"\u1D1A" => "R"
-
-# Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
-"\u1E58" => "R"
-
-# Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
-"\u1E5A" => "R"
-
-# Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
-"\u1E5C" => "R"
-
-# Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
-"\u1E5E" => "R"
-
-# Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
-"\u24C7" => "R"
-
-# Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
-"\u2C64" => "R"
-
-# Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
-"\uA75A" => "R"
-
-# Ꞃ [LATIN CAPITAL LETTER INSULAR R]
-"\uA782" => "R"
-
-# R [FULLWIDTH LATIN CAPITAL LETTER R]
-"\uFF32" => "R"
-
-# ŕ [LATIN SMALL LETTER R WITH ACUTE]
-"\u0155" => "r"
-
-# ŗ [LATIN SMALL LETTER R WITH CEDILLA]
-"\u0157" => "r"
-
-# ř [LATIN SMALL LETTER R WITH CARON]
-"\u0159" => "r"
-
-# ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
-"\u0211" => "r"
-
-# ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
-"\u0213" => "r"
-
-# ɍ [LATIN SMALL LETTER R WITH STROKE]
-"\u024D" => "r"
-
-# ɼ [LATIN SMALL LETTER R WITH LONG LEG]
-"\u027C" => "r"
-
-# ɽ [LATIN SMALL LETTER R WITH TAIL]
-"\u027D" => "r"
-
-# ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
-"\u027E" => "r"
-
-# ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
-"\u027F" => "r"
-
-# ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
-"\u1D63" => "r"
-
-# ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
-"\u1D72" => "r"
-
-# ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
-"\u1D73" => "r"
-
-# ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
-"\u1D89" => "r"
-
-# ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
-"\u1E59" => "r"
-
-# ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
-"\u1E5B" => "r"
-
-# ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
-"\u1E5D" => "r"
-
-# ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
-"\u1E5F" => "r"
-
-# ⓡ [CIRCLED LATIN SMALL LETTER R]
-"\u24E1" => "r"
-
-# ꝛ [LATIN SMALL LETTER R ROTUNDA]
-"\uA75B" => "r"
-
-# ꞃ [LATIN SMALL LETTER INSULAR R]
-"\uA783" => "r"
-
-# r [FULLWIDTH LATIN SMALL LETTER R]
-"\uFF52" => "r"
-
-# ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
-"\u24AD" => "(r)"
-
-# Ś [LATIN CAPITAL LETTER S WITH ACUTE]
-"\u015A" => "S"
-
-# Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
-"\u015C" => "S"
-
-# Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
-"\u015E" => "S"
-
-# Š [LATIN CAPITAL LETTER S WITH CARON]
-"\u0160" => "S"
-
-# Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
-"\u0218" => "S"
-
-# Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
-"\u1E60" => "S"
-
-# Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
-"\u1E62" => "S"
-
-# Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
-"\u1E64" => "S"
-
-# Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
-"\u1E66" => "S"
-
-# Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
-"\u1E68" => "S"
-
-# Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
-"\u24C8" => "S"
-
-# ꜱ [LATIN LETTER SMALL CAPITAL S]
-"\uA731" => "S"
-
-# ꞅ [LATIN SMALL LETTER INSULAR S]
-"\uA785" => "S"
-
-# S [FULLWIDTH LATIN CAPITAL LETTER S]
-"\uFF33" => "S"
-
-# ś [LATIN SMALL LETTER S WITH ACUTE]
-"\u015B" => "s"
-
-# ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
-"\u015D" => "s"
-
-# ş [LATIN SMALL LETTER S WITH CEDILLA]
-"\u015F" => "s"
-
-# š [LATIN SMALL LETTER S WITH CARON]
-"\u0161" => "s"
-
-# ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
-"\u017F" => "s"
-
-# ș [LATIN SMALL LETTER S WITH COMMA BELOW]
-"\u0219" => "s"
-
-# ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
-"\u023F" => "s"
-
-# ʂ [LATIN SMALL LETTER S WITH HOOK]
-"\u0282" => "s"
-
-# ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
-"\u1D74" => "s"
-
-# ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
-"\u1D8A" => "s"
-
-# ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
-"\u1E61" => "s"
-
-# ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
-"\u1E63" => "s"
-
-# ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
-"\u1E65" => "s"
-
-# ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
-"\u1E67" => "s"
-
-# ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
-"\u1E69" => "s"
-
-# ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
-"\u1E9C" => "s"
-
-# ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
-"\u1E9D" => "s"
-
-# ⓢ [CIRCLED LATIN SMALL LETTER S]
-"\u24E2" => "s"
-
-# Ꞅ [LATIN CAPITAL LETTER INSULAR S]
-"\uA784" => "s"
-
-# s [FULLWIDTH LATIN SMALL LETTER S]
-"\uFF53" => "s"
-
-# ẞ [LATIN CAPITAL LETTER SHARP S]
-"\u1E9E" => "SS"
-
-# ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
-"\u24AE" => "(s)"
-
-# ß [LATIN SMALL LETTER SHARP S]
-"\u00DF" => "ss"
-
-# st [LATIN SMALL LIGATURE ST]
-"\uFB06" => "st"
-
-# Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
-"\u0162" => "T"
-
-# Ť [LATIN CAPITAL LETTER T WITH CARON]
-"\u0164" => "T"
-
-# Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
-"\u0166" => "T"
-
-# Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
-"\u01AC" => "T"
-
-# Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
-"\u01AE" => "T"
-
-# Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
-"\u021A" => "T"
-
-# Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
-"\u023E" => "T"
-
-# ᴛ [LATIN LETTER SMALL CAPITAL T]
-"\u1D1B" => "T"
-
-# Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
-"\u1E6A" => "T"
-
-# Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
-"\u1E6C" => "T"
-
-# Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
-"\u1E6E" => "T"
-
-# Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
-"\u1E70" => "T"
-
-# Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
-"\u24C9" => "T"
-
-# Ꞇ [LATIN CAPITAL LETTER INSULAR T]
-"\uA786" => "T"
-
-# T [FULLWIDTH LATIN CAPITAL LETTER T]
-"\uFF34" => "T"
-
-# ţ [LATIN SMALL LETTER T WITH CEDILLA]
-"\u0163" => "t"
-
-# ť [LATIN SMALL LETTER T WITH CARON]
-"\u0165" => "t"
-
-# ŧ [LATIN SMALL LETTER T WITH STROKE]
-"\u0167" => "t"
-
-# ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
-"\u01AB" => "t"
-
-# ƭ [LATIN SMALL LETTER T WITH HOOK]
-"\u01AD" => "t"
-
-# ț [LATIN SMALL LETTER T WITH COMMA BELOW]
-"\u021B" => "t"
-
-# ȶ [LATIN SMALL LETTER T WITH CURL]
-"\u0236" => "t"
-
-# ʇ [LATIN SMALL LETTER TURNED T]
-"\u0287" => "t"
-
-# ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
-"\u0288" => "t"
-
-# ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
-"\u1D75" => "t"
-
-# ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
-"\u1E6B" => "t"
-
-# ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
-"\u1E6D" => "t"
-
-# ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
-"\u1E6F" => "t"
-
-# ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
-"\u1E71" => "t"
-
-# ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
-"\u1E97" => "t"
-
-# ⓣ [CIRCLED LATIN SMALL LETTER T]
-"\u24E3" => "t"
-
-# ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
-"\u2C66" => "t"
-
-# t [FULLWIDTH LATIN SMALL LETTER T]
-"\uFF54" => "t"
-
-# Þ [LATIN CAPITAL LETTER THORN]
-"\u00DE" => "TH"
-
-# Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
-"\uA766" => "TH"
-
-# Ꜩ [LATIN CAPITAL LETTER TZ]
-"\uA728" => "TZ"
-
-# ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
-"\u24AF" => "(t)"
-
-# ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
-"\u02A8" => "tc"
-
-# þ [LATIN SMALL LETTER THORN]
-"\u00FE" => "th"
-
-# ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
-"\u1D7A" => "th"
-
-# ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
-"\uA767" => "th"
-
-# ʦ [LATIN SMALL LETTER TS DIGRAPH]
-"\u02A6" => "ts"
-
-# ꜩ [LATIN SMALL LETTER TZ]
-"\uA729" => "tz"
-
-# Ù [LATIN CAPITAL LETTER U WITH GRAVE]
-"\u00D9" => "U"
-
-# Ú [LATIN CAPITAL LETTER U WITH ACUTE]
-"\u00DA" => "U"
-
-# Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
-"\u00DB" => "U"
-
-# Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
-"\u00DC" => "U"
-
-# Ũ [LATIN CAPITAL LETTER U WITH TILDE]
-"\u0168" => "U"
-
-# Ū [LATIN CAPITAL LETTER U WITH MACRON]
-"\u016A" => "U"
-
-# Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
-"\u016C" => "U"
-
-# Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
-"\u016E" => "U"
-
-# Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
-"\u0170" => "U"
-
-# Ų [LATIN CAPITAL LETTER U WITH OGONEK]
-"\u0172" => "U"
-
-# Ư [LATIN CAPITAL LETTER U WITH HORN]
-"\u01AF" => "U"
-
-# Ǔ [LATIN CAPITAL LETTER U WITH CARON]
-"\u01D3" => "U"
-
-# Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
-"\u01D5" => "U"
-
-# Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
-"\u01D7" => "U"
-
-# Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
-"\u01D9" => "U"
-
-# Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
-"\u01DB" => "U"
-
-# Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
-"\u0214" => "U"
-
-# Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
-"\u0216" => "U"
-
-# Ʉ [LATIN CAPITAL LETTER U BAR]
-"\u0244" => "U"
-
-# ᴜ [LATIN LETTER SMALL CAPITAL U]
-"\u1D1C" => "U"
-
-# ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
-"\u1D7E" => "U"
-
-# Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
-"\u1E72" => "U"
-
-# Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
-"\u1E74" => "U"
-
-# Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
-"\u1E76" => "U"
-
-# Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
-"\u1E78" => "U"
-
-# Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
-"\u1E7A" => "U"
-
-# Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
-"\u1EE4" => "U"
-
-# Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
-"\u1EE6" => "U"
-
-# Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
-"\u1EE8" => "U"
-
-# Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
-"\u1EEA" => "U"
-
-# Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
-"\u1EEC" => "U"
-
-# Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
-"\u1EEE" => "U"
-
-# Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
-"\u1EF0" => "U"
-
-# Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
-"\u24CA" => "U"
-
-# U [FULLWIDTH LATIN CAPITAL LETTER U]
-"\uFF35" => "U"
-
-# ù [LATIN SMALL LETTER U WITH GRAVE]
-"\u00F9" => "u"
-
-# ú [LATIN SMALL LETTER U WITH ACUTE]
-"\u00FA" => "u"
-
-# û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
-"\u00FB" => "u"
-
-# ü [LATIN SMALL LETTER U WITH DIAERESIS]
-"\u00FC" => "u"
-
-# ũ [LATIN SMALL LETTER U WITH TILDE]
-"\u0169" => "u"
-
-# ū [LATIN SMALL LETTER U WITH MACRON]
-"\u016B" => "u"
-
-# ŭ [LATIN SMALL LETTER U WITH BREVE]
-"\u016D" => "u"
-
-# ů [LATIN SMALL LETTER U WITH RING ABOVE]
-"\u016F" => "u"
-
-# ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
-"\u0171" => "u"
-
-# ų [LATIN SMALL LETTER U WITH OGONEK]
-"\u0173" => "u"
-
-# ư [LATIN SMALL LETTER U WITH HORN]
-"\u01B0" => "u"
-
-# ǔ [LATIN SMALL LETTER U WITH CARON]
-"\u01D4" => "u"
-
-# ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
-"\u01D6" => "u"
-
-# ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
-"\u01D8" => "u"
-
-# ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
-"\u01DA" => "u"
-
-# ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
-"\u01DC" => "u"
-
-# ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
-"\u0215" => "u"
-
-# ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
-"\u0217" => "u"
-
-# ʉ [LATIN SMALL LETTER U BAR]
-"\u0289" => "u"
-
-# ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
-"\u1D64" => "u"
-
-# ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
-"\u1D99" => "u"
-
-# ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
-"\u1E73" => "u"
-
-# ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
-"\u1E75" => "u"
-
-# ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
-"\u1E77" => "u"
-
-# ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
-"\u1E79" => "u"
-
-# ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
-"\u1E7B" => "u"
-
-# ụ [LATIN SMALL LETTER U WITH DOT BELOW]
-"\u1EE5" => "u"
-
-# ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
-"\u1EE7" => "u"
-
-# ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
-"\u1EE9" => "u"
-
-# ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
-"\u1EEB" => "u"
-
-# ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
-"\u1EED" => "u"
-
-# ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
-"\u1EEF" => "u"
-
-# ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
-"\u1EF1" => "u"
-
-# ⓤ [CIRCLED LATIN SMALL LETTER U]
-"\u24E4" => "u"
-
-# u [FULLWIDTH LATIN SMALL LETTER U]
-"\uFF55" => "u"
-
-# ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
-"\u24B0" => "(u)"
-
-# ᵫ [LATIN SMALL LETTER UE]
-"\u1D6B" => "ue"
-
-# Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
-"\u01B2" => "V"
-
-# Ʌ [LATIN CAPITAL LETTER TURNED V]
-"\u0245" => "V"
-
-# ᴠ [LATIN LETTER SMALL CAPITAL V]
-"\u1D20" => "V"
-
-# Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
-"\u1E7C" => "V"
-
-# Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
-"\u1E7E" => "V"
-
-# Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
-"\u1EFC" => "V"
-
-# Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
-"\u24CB" => "V"
-
-# Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
-"\uA75E" => "V"
-
-# Ꝩ [LATIN CAPITAL LETTER VEND]
-"\uA768" => "V"
-
-# V [FULLWIDTH LATIN CAPITAL LETTER V]
-"\uFF36" => "V"
-
-# ʋ [LATIN SMALL LETTER V WITH HOOK]
-"\u028B" => "v"
-
-# ʌ [LATIN SMALL LETTER TURNED V]
-"\u028C" => "v"
-
-# ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
-"\u1D65" => "v"
-
-# ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
-"\u1D8C" => "v"
-
-# ṽ [LATIN SMALL LETTER V WITH TILDE]
-"\u1E7D" => "v"
-
-# ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
-"\u1E7F" => "v"
-
-# ⓥ [CIRCLED LATIN SMALL LETTER V]
-"\u24E5" => "v"
-
-# ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
-"\u2C71" => "v"
-
-# ⱴ [LATIN SMALL LETTER V WITH CURL]
-"\u2C74" => "v"
-
-# ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
-"\uA75F" => "v"
-
-# v [FULLWIDTH LATIN SMALL LETTER V]
-"\uFF56" => "v"
-
-# Ꝡ [LATIN CAPITAL LETTER VY]
-"\uA760" => "VY"
-
-# ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
-"\u24B1" => "(v)"
-
-# ꝡ [LATIN SMALL LETTER VY]
-"\uA761" => "vy"
-
-# Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
-"\u0174" => "W"
-
-# Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
-"\u01F7" => "W"
-
-# ᴡ [LATIN LETTER SMALL CAPITAL W]
-"\u1D21" => "W"
-
-# Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
-"\u1E80" => "W"
-
-# Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
-"\u1E82" => "W"
-
-# Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
-"\u1E84" => "W"
-
-# Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
-"\u1E86" => "W"
-
-# Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
-"\u1E88" => "W"
-
-# Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
-"\u24CC" => "W"
-
-# Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
-"\u2C72" => "W"
-
-# W [FULLWIDTH LATIN CAPITAL LETTER W]
-"\uFF37" => "W"
-
-# ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
-"\u0175" => "w"
-
-# ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
-"\u01BF" => "w"
-
-# ʍ [LATIN SMALL LETTER TURNED W]
-"\u028D" => "w"
-
-# ẁ [LATIN SMALL LETTER W WITH GRAVE]
-"\u1E81" => "w"
-
-# ẃ [LATIN SMALL LETTER W WITH ACUTE]
-"\u1E83" => "w"
-
-# ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
-"\u1E85" => "w"
-
-# ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
-"\u1E87" => "w"
-
-# ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
-"\u1E89" => "w"
-
-# ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
-"\u1E98" => "w"
-
-# ⓦ [CIRCLED LATIN SMALL LETTER W]
-"\u24E6" => "w"
-
-# ⱳ [LATIN SMALL LETTER W WITH HOOK]
-"\u2C73" => "w"
-
-# w [FULLWIDTH LATIN SMALL LETTER W]
-"\uFF57" => "w"
-
-# ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
-"\u24B2" => "(w)"
-
-# Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
-"\u1E8A" => "X"
-
-# Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
-"\u1E8C" => "X"
-
-# Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
-"\u24CD" => "X"
-
-# X [FULLWIDTH LATIN CAPITAL LETTER X]
-"\uFF38" => "X"
-
-# ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
-"\u1D8D" => "x"
-
-# ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
-"\u1E8B" => "x"
-
-# ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
-"\u1E8D" => "x"
-
-# ₓ [LATIN SUBSCRIPT SMALL LETTER X]
-"\u2093" => "x"
-
-# ⓧ [CIRCLED LATIN SMALL LETTER X]
-"\u24E7" => "x"
-
-# x [FULLWIDTH LATIN SMALL LETTER X]
-"\uFF58" => "x"
-
-# ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
-"\u24B3" => "(x)"
-
-# Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
-"\u00DD" => "Y"
-
-# Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
-"\u0176" => "Y"
-
-# Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
-"\u0178" => "Y"
-
-# Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
-"\u01B3" => "Y"
-
-# Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
-"\u0232" => "Y"
-
-# Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
-"\u024E" => "Y"
-
-# ʏ [LATIN LETTER SMALL CAPITAL Y]
-"\u028F" => "Y"
-
-# Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
-"\u1E8E" => "Y"
-
-# Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
-"\u1EF2" => "Y"
-
-# Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
-"\u1EF4" => "Y"
-
-# Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
-"\u1EF6" => "Y"
-
-# Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
-"\u1EF8" => "Y"
-
-# Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
-"\u1EFE" => "Y"
-
-# Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
-"\u24CE" => "Y"
-
-# Y [FULLWIDTH LATIN CAPITAL LETTER Y]
-"\uFF39" => "Y"
-
-# ý [LATIN SMALL LETTER Y WITH ACUTE]
-"\u00FD" => "y"
-
-# ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
-"\u00FF" => "y"
-
-# ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
-"\u0177" => "y"
-
-# ƴ [LATIN SMALL LETTER Y WITH HOOK]
-"\u01B4" => "y"
-
-# ȳ [LATIN SMALL LETTER Y WITH MACRON]
-"\u0233" => "y"
-
-# ɏ [LATIN SMALL LETTER Y WITH STROKE]
-"\u024F" => "y"
-
-# ʎ [LATIN SMALL LETTER TURNED Y]
-"\u028E" => "y"
-
-# ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
-"\u1E8F" => "y"
-
-# ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
-"\u1E99" => "y"
-
-# ỳ [LATIN SMALL LETTER Y WITH GRAVE]
-"\u1EF3" => "y"
-
-# ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
-"\u1EF5" => "y"
-
-# ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
-"\u1EF7" => "y"
-
-# ỹ [LATIN SMALL LETTER Y WITH TILDE]
-"\u1EF9" => "y"
-
-# ỿ [LATIN SMALL LETTER Y WITH LOOP]
-"\u1EFF" => "y"
-
-# ⓨ [CIRCLED LATIN SMALL LETTER Y]
-"\u24E8" => "y"
-
-# y [FULLWIDTH LATIN SMALL LETTER Y]
-"\uFF59" => "y"
-
-# ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
-"\u24B4" => "(y)"
-
-# Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
-"\u0179" => "Z"
-
-# Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
-"\u017B" => "Z"
-
-# Ž [LATIN CAPITAL LETTER Z WITH CARON]
-"\u017D" => "Z"
-
-# Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
-"\u01B5" => "Z"
-
-# Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
-"\u021C" => "Z"
-
-# Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
-"\u0224" => "Z"
-
-# ᴢ [LATIN LETTER SMALL CAPITAL Z]
-"\u1D22" => "Z"
-
-# Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
-"\u1E90" => "Z"
-
-# Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
-"\u1E92" => "Z"
-
-# Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
-"\u1E94" => "Z"
-
-# Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
-"\u24CF" => "Z"
-
-# Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
-"\u2C6B" => "Z"
-
-# Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
-"\uA762" => "Z"
-
-# Z [FULLWIDTH LATIN CAPITAL LETTER Z]
-"\uFF3A" => "Z"
-
-# ź [LATIN SMALL LETTER Z WITH ACUTE]
-"\u017A" => "z"
-
-# ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
-"\u017C" => "z"
-
-# ž [LATIN SMALL LETTER Z WITH CARON]
-"\u017E" => "z"
-
-# ƶ [LATIN SMALL LETTER Z WITH STROKE]
-"\u01B6" => "z"
-
-# ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
-"\u021D" => "z"
-
-# ȥ [LATIN SMALL LETTER Z WITH HOOK]
-"\u0225" => "z"
-
-# ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
-"\u0240" => "z"
-
-# ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
-"\u0290" => "z"
-
-# ʑ [LATIN SMALL LETTER Z WITH CURL]
-"\u0291" => "z"
-
-# ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
-"\u1D76" => "z"
-
-# ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
-"\u1D8E" => "z"
-
-# ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
-"\u1E91" => "z"
-
-# ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
-"\u1E93" => "z"
-
-# ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
-"\u1E95" => "z"
-
-# ⓩ [CIRCLED LATIN SMALL LETTER Z]
-"\u24E9" => "z"
-
-# ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
-"\u2C6C" => "z"
-
-# ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
-"\uA763" => "z"
-
-# z [FULLWIDTH LATIN SMALL LETTER Z]
-"\uFF5A" => "z"
-
-# ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
-"\u24B5" => "(z)"
-
-# ⁰ [SUPERSCRIPT ZERO]
-"\u2070" => "0"
-
-# ₀ [SUBSCRIPT ZERO]
-"\u2080" => "0"
-
-# ⓪ [CIRCLED DIGIT ZERO]
-"\u24EA" => "0"
-
-# ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
-"\u24FF" => "0"
-
-# 0 [FULLWIDTH DIGIT ZERO]
-"\uFF10" => "0"
-
-# ¹ [SUPERSCRIPT ONE]
-"\u00B9" => "1"
-
-# ₁ [SUBSCRIPT ONE]
-"\u2081" => "1"
-
-# ① [CIRCLED DIGIT ONE]
-"\u2460" => "1"
-
-# ⓵ [DOUBLE CIRCLED DIGIT ONE]
-"\u24F5" => "1"
-
-# ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
-"\u2776" => "1"
-
-# ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
-"\u2780" => "1"
-
-# ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
-"\u278A" => "1"
-
-# 1 [FULLWIDTH DIGIT ONE]
-"\uFF11" => "1"
-
-# ⒈ [DIGIT ONE FULL STOP]
-"\u2488" => "1."
-
-# ⑴ [PARENTHESIZED DIGIT ONE]
-"\u2474" => "(1)"
-
-# ² [SUPERSCRIPT TWO]
-"\u00B2" => "2"
-
-# ₂ [SUBSCRIPT TWO]
-"\u2082" => "2"
-
-# ② [CIRCLED DIGIT TWO]
-"\u2461" => "2"
-
-# ⓶ [DOUBLE CIRCLED DIGIT TWO]
-"\u24F6" => "2"
-
-# ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
-"\u2777" => "2"
-
-# ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
-"\u2781" => "2"
-
-# ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
-"\u278B" => "2"
-
-# 2 [FULLWIDTH DIGIT TWO]
-"\uFF12" => "2"
-
-# ⒉ [DIGIT TWO FULL STOP]
-"\u2489" => "2."
-
-# ⑵ [PARENTHESIZED DIGIT TWO]
-"\u2475" => "(2)"
-
-# ³ [SUPERSCRIPT THREE]
-"\u00B3" => "3"
-
-# ₃ [SUBSCRIPT THREE]
-"\u2083" => "3"
-
-# ③ [CIRCLED DIGIT THREE]
-"\u2462" => "3"
-
-# ⓷ [DOUBLE CIRCLED DIGIT THREE]
-"\u24F7" => "3"
-
-# ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
-"\u2778" => "3"
-
-# ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
-"\u2782" => "3"
-
-# ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
-"\u278C" => "3"
-
-# 3 [FULLWIDTH DIGIT THREE]
-"\uFF13" => "3"
-
-# ⒊ [DIGIT THREE FULL STOP]
-"\u248A" => "3."
-
-# ⑶ [PARENTHESIZED DIGIT THREE]
-"\u2476" => "(3)"
-
-# ⁴ [SUPERSCRIPT FOUR]
-"\u2074" => "4"
-
-# ₄ [SUBSCRIPT FOUR]
-"\u2084" => "4"
-
-# ④ [CIRCLED DIGIT FOUR]
-"\u2463" => "4"
-
-# ⓸ [DOUBLE CIRCLED DIGIT FOUR]
-"\u24F8" => "4"
-
-# ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
-"\u2779" => "4"
-
-# ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
-"\u2783" => "4"
-
-# ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
-"\u278D" => "4"
-
-# 4 [FULLWIDTH DIGIT FOUR]
-"\uFF14" => "4"
-
-# ⒋ [DIGIT FOUR FULL STOP]
-"\u248B" => "4."
-
-# ⑷ [PARENTHESIZED DIGIT FOUR]
-"\u2477" => "(4)"
-
-# ⁵ [SUPERSCRIPT FIVE]
-"\u2075" => "5"
-
-# ₅ [SUBSCRIPT FIVE]
-"\u2085" => "5"
-
-# ⑤ [CIRCLED DIGIT FIVE]
-"\u2464" => "5"
-
-# ⓹ [DOUBLE CIRCLED DIGIT FIVE]
-"\u24F9" => "5"
-
-# ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
-"\u277A" => "5"
-
-# ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
-"\u2784" => "5"
-
-# ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
-"\u278E" => "5"
-
-# 5 [FULLWIDTH DIGIT FIVE]
-"\uFF15" => "5"
-
-# ⒌ [DIGIT FIVE FULL STOP]
-"\u248C" => "5."
-
-# ⑸ [PARENTHESIZED DIGIT FIVE]
-"\u2478" => "(5)"
-
-# ⁶ [SUPERSCRIPT SIX]
-"\u2076" => "6"
-
-# ₆ [SUBSCRIPT SIX]
-"\u2086" => "6"
-
-# ⑥ [CIRCLED DIGIT SIX]
-"\u2465" => "6"
-
-# ⓺ [DOUBLE CIRCLED DIGIT SIX]
-"\u24FA" => "6"
-
-# ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
-"\u277B" => "6"
-
-# ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
-"\u2785" => "6"
-
-# ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
-"\u278F" => "6"
-
-# 6 [FULLWIDTH DIGIT SIX]
-"\uFF16" => "6"
-
-# ⒍ [DIGIT SIX FULL STOP]
-"\u248D" => "6."
-
-# ⑹ [PARENTHESIZED DIGIT SIX]
-"\u2479" => "(6)"
-
-# ⁷ [SUPERSCRIPT SEVEN]
-"\u2077" => "7"
-
-# ₇ [SUBSCRIPT SEVEN]
-"\u2087" => "7"
-
-# ⑦ [CIRCLED DIGIT SEVEN]
-"\u2466" => "7"
-
-# ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
-"\u24FB" => "7"
-
-# ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
-"\u277C" => "7"
-
-# ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
-"\u2786" => "7"
-
-# ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
-"\u2790" => "7"
-
-# 7 [FULLWIDTH DIGIT SEVEN]
-"\uFF17" => "7"
-
-# ⒎ [DIGIT SEVEN FULL STOP]
-"\u248E" => "7."
-
-# ⑺ [PARENTHESIZED DIGIT SEVEN]
-"\u247A" => "(7)"
-
-# ⁸ [SUPERSCRIPT EIGHT]
-"\u2078" => "8"
-
-# ₈ [SUBSCRIPT EIGHT]
-"\u2088" => "8"
-
-# ⑧ [CIRCLED DIGIT EIGHT]
-"\u2467" => "8"
-
-# ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
-"\u24FC" => "8"
-
-# ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
-"\u277D" => "8"
-
-# ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
-"\u2787" => "8"
-
-# ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
-"\u2791" => "8"
-
-# 8 [FULLWIDTH DIGIT EIGHT]
-"\uFF18" => "8"
-
-# ⒏ [DIGIT EIGHT FULL STOP]
-"\u248F" => "8."
-
-# ⑻ [PARENTHESIZED DIGIT EIGHT]
-"\u247B" => "(8)"
-
-# ⁹ [SUPERSCRIPT NINE]
-"\u2079" => "9"
-
-# ₉ [SUBSCRIPT NINE]
-"\u2089" => "9"
-
-# ⑨ [CIRCLED DIGIT NINE]
-"\u2468" => "9"
-
-# ⓽ [DOUBLE CIRCLED DIGIT NINE]
-"\u24FD" => "9"
-
-# ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
-"\u277E" => "9"
-
-# ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
-"\u2788" => "9"
-
-# ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
-"\u2792" => "9"
-
-# 9 [FULLWIDTH DIGIT NINE]
-"\uFF19" => "9"
-
-# ⒐ [DIGIT NINE FULL STOP]
-"\u2490" => "9."
-
-# ⑼ [PARENTHESIZED DIGIT NINE]
-"\u247C" => "(9)"
-
-# ⑩ [CIRCLED NUMBER TEN]
-"\u2469" => "10"
-
-# ⓾ [DOUBLE CIRCLED NUMBER TEN]
-"\u24FE" => "10"
-
-# ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
-"\u277F" => "10"
-
-# ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
-"\u2789" => "10"
-
-# ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
-"\u2793" => "10"
-
-# ⒑ [NUMBER TEN FULL STOP]
-"\u2491" => "10."
-
-# ⑽ [PARENTHESIZED NUMBER TEN]
-"\u247D" => "(10)"
-
-# ⑪ [CIRCLED NUMBER ELEVEN]
-"\u246A" => "11"
-
-# ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
-"\u24EB" => "11"
-
-# ⒒ [NUMBER ELEVEN FULL STOP]
-"\u2492" => "11."
-
-# ⑾ [PARENTHESIZED NUMBER ELEVEN]
-"\u247E" => "(11)"
-
-# ⑫ [CIRCLED NUMBER TWELVE]
-"\u246B" => "12"
-
-# ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
-"\u24EC" => "12"
-
-# ⒓ [NUMBER TWELVE FULL STOP]
-"\u2493" => "12."
-
-# ⑿ [PARENTHESIZED NUMBER TWELVE]
-"\u247F" => "(12)"
-
-# ⑬ [CIRCLED NUMBER THIRTEEN]
-"\u246C" => "13"
-
-# ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
-"\u24ED" => "13"
-
-# ⒔ [NUMBER THIRTEEN FULL STOP]
-"\u2494" => "13."
-
-# ⒀ [PARENTHESIZED NUMBER THIRTEEN]
-"\u2480" => "(13)"
-
-# ⑭ [CIRCLED NUMBER FOURTEEN]
-"\u246D" => "14"
-
-# ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
-"\u24EE" => "14"
-
-# ⒕ [NUMBER FOURTEEN FULL STOP]
-"\u2495" => "14."
-
-# ⒁ [PARENTHESIZED NUMBER FOURTEEN]
-"\u2481" => "(14)"
-
-# ⑮ [CIRCLED NUMBER FIFTEEN]
-"\u246E" => "15"
-
-# ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
-"\u24EF" => "15"
-
-# ⒖ [NUMBER FIFTEEN FULL STOP]
-"\u2496" => "15."
-
-# ⒂ [PARENTHESIZED NUMBER FIFTEEN]
-"\u2482" => "(15)"
-
-# ⑯ [CIRCLED NUMBER SIXTEEN]
-"\u246F" => "16"
-
-# ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
-"\u24F0" => "16"
-
-# ⒗ [NUMBER SIXTEEN FULL STOP]
-"\u2497" => "16."
-
-# ⒃ [PARENTHESIZED NUMBER SIXTEEN]
-"\u2483" => "(16)"
-
-# ⑰ [CIRCLED NUMBER SEVENTEEN]
-"\u2470" => "17"
-
-# ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
-"\u24F1" => "17"
-
-# ⒘ [NUMBER SEVENTEEN FULL STOP]
-"\u2498" => "17."
-
-# ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
-"\u2484" => "(17)"
-
-# ⑱ [CIRCLED NUMBER EIGHTEEN]
-"\u2471" => "18"
-
-# ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
-"\u24F2" => "18"
-
-# ⒙ [NUMBER EIGHTEEN FULL STOP]
-"\u2499" => "18."
-
-# ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
-"\u2485" => "(18)"
-
-# ⑲ [CIRCLED NUMBER NINETEEN]
-"\u2472" => "19"
-
-# ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
-"\u24F3" => "19"
-
-# ⒚ [NUMBER NINETEEN FULL STOP]
-"\u249A" => "19."
-
-# ⒆ [PARENTHESIZED NUMBER NINETEEN]
-"\u2486" => "(19)"
-
-# ⑳ [CIRCLED NUMBER TWENTY]
-"\u2473" => "20"
-
-# ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
-"\u24F4" => "20"
-
-# ⒛ [NUMBER TWENTY FULL STOP]
-"\u249B" => "20."
-
-# ⒇ [PARENTHESIZED NUMBER TWENTY]
-"\u2487" => "(20)"
-
-# « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
-"\u00AB" => "\""
-
-# » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
-"\u00BB" => "\""
-
-# “ [LEFT DOUBLE QUOTATION MARK]
-"\u201C" => "\""
-
-# ” [RIGHT DOUBLE QUOTATION MARK]
-"\u201D" => "\""
-
-# „ [DOUBLE LOW-9 QUOTATION MARK]
-"\u201E" => "\""
-
-# ″ [DOUBLE PRIME]
-"\u2033" => "\""
-
-# ‶ [REVERSED DOUBLE PRIME]
-"\u2036" => "\""
-
-# ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
-"\u275D" => "\""
-
-# ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
-"\u275E" => "\""
-
-# ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
-"\u276E" => "\""
-
-# ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
-"\u276F" => "\""
-
-# " [FULLWIDTH QUOTATION MARK]
-"\uFF02" => "\""
-
-# ‘ [LEFT SINGLE QUOTATION MARK]
-"\u2018" => "\'"
-
-# ’ [RIGHT SINGLE QUOTATION MARK]
-"\u2019" => "\'"
-
-# ‚ [SINGLE LOW-9 QUOTATION MARK]
-"\u201A" => "\'"
-
-# ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
-"\u201B" => "\'"
-
-# ′ [PRIME]
-"\u2032" => "\'"
-
-# ‵ [REVERSED PRIME]
-"\u2035" => "\'"
-
-# ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
-"\u2039" => "\'"
-
-# › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
-"\u203A" => "\'"
-
-# ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
-"\u275B" => "\'"
-
-# ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
-"\u275C" => "\'"
-
-# ' [FULLWIDTH APOSTROPHE]
-"\uFF07" => "\'"
-
-# ‐ [HYPHEN]
-"\u2010" => "-"
-
-# ‑ [NON-BREAKING HYPHEN]
-"\u2011" => "-"
-
-# ‒ [FIGURE DASH]
-"\u2012" => "-"
-
-# – [EN DASH]
-"\u2013" => "-"
-
-# — [EM DASH]
-"\u2014" => "-"
-
-# ⁻ [SUPERSCRIPT MINUS]
-"\u207B" => "-"
-
-# ₋ [SUBSCRIPT MINUS]
-"\u208B" => "-"
-
-# - [FULLWIDTH HYPHEN-MINUS]
-"\uFF0D" => "-"
-
-# ⁅ [LEFT SQUARE BRACKET WITH QUILL]
-"\u2045" => "["
-
-# ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
-"\u2772" => "["
-
-# [ [FULLWIDTH LEFT SQUARE BRACKET]
-"\uFF3B" => "["
-
-# ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
-"\u2046" => "]"
-
-# ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
-"\u2773" => "]"
-
-# ] [FULLWIDTH RIGHT SQUARE BRACKET]
-"\uFF3D" => "]"
-
-# ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
-"\u207D" => "("
-
-# ₍ [SUBSCRIPT LEFT PARENTHESIS]
-"\u208D" => "("
-
-# ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
-"\u2768" => "("
-
-# ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
-"\u276A" => "("
-
-# ( [FULLWIDTH LEFT PARENTHESIS]
-"\uFF08" => "("
-
-# ⸨ [LEFT DOUBLE PARENTHESIS]
-"\u2E28" => "(("
-
-# ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
-"\u207E" => ")"
-
-# ₎ [SUBSCRIPT RIGHT PARENTHESIS]
-"\u208E" => ")"
-
-# ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
-"\u2769" => ")"
-
-# ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
-"\u276B" => ")"
-
-# ) [FULLWIDTH RIGHT PARENTHESIS]
-"\uFF09" => ")"
-
-# ⸩ [RIGHT DOUBLE PARENTHESIS]
-"\u2E29" => "))"
-
-# ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
-"\u276C" => "<"
-
-# ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
-"\u2770" => "<"
-
-# < [FULLWIDTH LESS-THAN SIGN]
-"\uFF1C" => "<"
-
-# ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
-"\u276D" => ">"
-
-# ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
-"\u2771" => ">"
-
-# > [FULLWIDTH GREATER-THAN SIGN]
-"\uFF1E" => ">"
-
-# ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
-"\u2774" => "{"
-
-# { [FULLWIDTH LEFT CURLY BRACKET]
-"\uFF5B" => "{"
-
-# ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
-"\u2775" => "}"
-
-# } [FULLWIDTH RIGHT CURLY BRACKET]
-"\uFF5D" => "}"
-
-# ⁺ [SUPERSCRIPT PLUS SIGN]
-"\u207A" => "+"
-
-# ₊ [SUBSCRIPT PLUS SIGN]
-"\u208A" => "+"
-
-# + [FULLWIDTH PLUS SIGN]
-"\uFF0B" => "+"
-
-# ⁼ [SUPERSCRIPT EQUALS SIGN]
-"\u207C" => "="
-
-# ₌ [SUBSCRIPT EQUALS SIGN]
-"\u208C" => "="
-
-# = [FULLWIDTH EQUALS SIGN]
-"\uFF1D" => "="
-
-# ! [FULLWIDTH EXCLAMATION MARK]
-"\uFF01" => "!"
-
-# ‼ [DOUBLE EXCLAMATION MARK]
-"\u203C" => "!!"
-
-# ⁉ [EXCLAMATION QUESTION MARK]
-"\u2049" => "!?"
-
-# # [FULLWIDTH NUMBER SIGN]
-"\uFF03" => "#"
-
-# $ [FULLWIDTH DOLLAR SIGN]
-"\uFF04" => "$"
-
-# ⁒ [COMMERCIAL MINUS SIGN]
-"\u2052" => "%"
-
-# % [FULLWIDTH PERCENT SIGN]
-"\uFF05" => "%"
-
-# & [FULLWIDTH AMPERSAND]
-"\uFF06" => "&"
-
-# ⁎ [LOW ASTERISK]
-"\u204E" => "*"
-
-# * [FULLWIDTH ASTERISK]
-"\uFF0A" => "*"
-
-# , [FULLWIDTH COMMA]
-"\uFF0C" => ","
-
-# . [FULLWIDTH FULL STOP]
-"\uFF0E" => "."
-
-# ⁄ [FRACTION SLASH]
-"\u2044" => "/"
-
-# / [FULLWIDTH SOLIDUS]
-"\uFF0F" => "/"
-
-# : [FULLWIDTH COLON]
-"\uFF1A" => ":"
-
-# ⁏ [REVERSED SEMICOLON]
-"\u204F" => ";"
-
-# ; [FULLWIDTH SEMICOLON]
-"\uFF1B" => ";"
-
-# ? [FULLWIDTH QUESTION MARK]
-"\uFF1F" => "?"
-
-# ⁇ [DOUBLE QUESTION MARK]
-"\u2047" => "??"
-
-# ⁈ [QUESTION EXCLAMATION MARK]
-"\u2048" => "?!"
-
-# @ [FULLWIDTH COMMERCIAL AT]
-"\uFF20" => "@"
-
-# \ [FULLWIDTH REVERSE SOLIDUS]
-"\uFF3C" => "\\"
-
-# ‸ [CARET]
-"\u2038" => "^"
-
-# ^ [FULLWIDTH CIRCUMFLEX ACCENT]
-"\uFF3E" => "^"
-
-# _ [FULLWIDTH LOW LINE]
-"\uFF3F" => "_"
-
-# ⁓ [SWUNG DASH]
-"\u2053" => "~"
-
-# ~ [FULLWIDTH TILDE]
-"\uFF5E" => "~"
-
-################################################################
-# Below is the Perl script used to generate the above mappings #
-# from ASCIIFoldingFilter.java: #
-################################################################
-#
-# #!/usr/bin/perl
-#
-# use warnings;
-# use strict;
-#
-# my @source_chars = ();
-# my @source_char_descriptions = ();
-# my $target = '';
-#
-# while (<>) {
-# if (/case\s+'(\\u[A-F0-9]+)':\s*\/\/\s*(.*)/i) {
-# push @source_chars, $1;
-# push @source_char_descriptions, $2;
-# next;
-# }
-# if (/output\[[^\]]+\]\s*=\s*'(\\'|\\\\|.)'/) {
-# $target .= $1;
-# next;
-# }
-# if (/break;/) {
-# $target = "\\\"" if ($target eq '"');
-# for my $source_char_num (0..$#source_chars) {
-# print "# $source_char_descriptions[$source_char_num]\n";
-# print "\"$source_chars[$source_char_num]\" => \"$target\"\n\n";
-# }
-# @source_chars = ();
-# @source_char_descriptions = ();
-# $target = '';
-# }
-# }
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/mapping-ISOLatin1Accent.txt b/src/test/resources/solr-home-4.1/collection1/conf/mapping-ISOLatin1Accent.txt
deleted file mode 100644
index c441043..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/mapping-ISOLatin1Accent.txt
+++ /dev/null
@@ -1,246 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Syntax:
-# "source" => "target"
-# "source".length() > 0 (source cannot be empty.)
-# "target".length() >= 0 (target can be empty.)
-
-# example:
-# "À" => "A"
-# "\u00C0" => "A"
-# "\u00C0" => "\u0041"
-# "ß" => "ss"
-# "\t" => " "
-# "\n" => ""
-
-# À => A
-"\u00C0" => "A"
-
-# Á => A
-"\u00C1" => "A"
-
-# Â => A
-"\u00C2" => "A"
-
-# Ã => A
-"\u00C3" => "A"
-
-# Ä => A
-"\u00C4" => "A"
-
-# Å => A
-"\u00C5" => "A"
-
-# Æ => AE
-"\u00C6" => "AE"
-
-# Ç => C
-"\u00C7" => "C"
-
-# È => E
-"\u00C8" => "E"
-
-# É => E
-"\u00C9" => "E"
-
-# Ê => E
-"\u00CA" => "E"
-
-# Ë => E
-"\u00CB" => "E"
-
-# Ì => I
-"\u00CC" => "I"
-
-# Í => I
-"\u00CD" => "I"
-
-# Î => I
-"\u00CE" => "I"
-
-# Ï => I
-"\u00CF" => "I"
-
-# IJ => IJ
-"\u0132" => "IJ"
-
-# Ð => D
-"\u00D0" => "D"
-
-# Ñ => N
-"\u00D1" => "N"
-
-# Ò => O
-"\u00D2" => "O"
-
-# Ó => O
-"\u00D3" => "O"
-
-# Ô => O
-"\u00D4" => "O"
-
-# Õ => O
-"\u00D5" => "O"
-
-# Ö => O
-"\u00D6" => "O"
-
-# Ø => O
-"\u00D8" => "O"
-
-# Œ => OE
-"\u0152" => "OE"
-
-# Þ
-"\u00DE" => "TH"
-
-# Ù => U
-"\u00D9" => "U"
-
-# Ú => U
-"\u00DA" => "U"
-
-# Û => U
-"\u00DB" => "U"
-
-# Ü => U
-"\u00DC" => "U"
-
-# Ý => Y
-"\u00DD" => "Y"
-
-# Ÿ => Y
-"\u0178" => "Y"
-
-# à => a
-"\u00E0" => "a"
-
-# á => a
-"\u00E1" => "a"
-
-# â => a
-"\u00E2" => "a"
-
-# ã => a
-"\u00E3" => "a"
-
-# ä => a
-"\u00E4" => "a"
-
-# å => a
-"\u00E5" => "a"
-
-# æ => ae
-"\u00E6" => "ae"
-
-# ç => c
-"\u00E7" => "c"
-
-# è => e
-"\u00E8" => "e"
-
-# é => e
-"\u00E9" => "e"
-
-# ê => e
-"\u00EA" => "e"
-
-# ë => e
-"\u00EB" => "e"
-
-# ì => i
-"\u00EC" => "i"
-
-# í => i
-"\u00ED" => "i"
-
-# î => i
-"\u00EE" => "i"
-
-# ï => i
-"\u00EF" => "i"
-
-# ij => ij
-"\u0133" => "ij"
-
-# ð => d
-"\u00F0" => "d"
-
-# ñ => n
-"\u00F1" => "n"
-
-# ò => o
-"\u00F2" => "o"
-
-# ó => o
-"\u00F3" => "o"
-
-# ô => o
-"\u00F4" => "o"
-
-# õ => o
-"\u00F5" => "o"
-
-# ö => o
-"\u00F6" => "o"
-
-# ø => o
-"\u00F8" => "o"
-
-# œ => oe
-"\u0153" => "oe"
-
-# ß => ss
-"\u00DF" => "ss"
-
-# þ => th
-"\u00FE" => "th"
-
-# ù => u
-"\u00F9" => "u"
-
-# ú => u
-"\u00FA" => "u"
-
-# û => u
-"\u00FB" => "u"
-
-# ü => u
-"\u00FC" => "u"
-
-# ý => y
-"\u00FD" => "y"
-
-# ÿ => y
-"\u00FF" => "y"
-
-# ff => ff
-"\uFB00" => "ff"
-
-# fi => fi
-"\uFB01" => "fi"
-
-# fl => fl
-"\uFB02" => "fl"
-
-# ffi => ffi
-"\uFB03" => "ffi"
-
-# ffl => ffl
-"\uFB04" => "ffl"
-
-# ſt => ft
-"\uFB05" => "ft"
-
-# st => st
-"\uFB06" => "st"
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/protwords.txt b/src/test/resources/solr-home-4.1/collection1/conf/protwords.txt
deleted file mode 100644
index 5a32e50..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/protwords.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-#-----------------------------------------------------------------------
-# Use a protected word file to protect against the stemmer reducing two
-# unrelated words to the same base word.
-
-# Some non-words that normally won't be encountered,
-# just to test that they won't be stemmed.
-dontstems
-zwhacky
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/schema.xml b/src/test/resources/solr-home-4.1/collection1/conf/schema.xml
deleted file mode 100644
index e1503e4..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/schema.xml
+++ /dev/null
@@ -1,1108 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- id
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/scripts.conf b/src/test/resources/solr-home-4.1/collection1/conf/scripts.conf
deleted file mode 100644
index f58b262..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/scripts.conf
+++ /dev/null
@@ -1,24 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-user=
-solr_hostname=localhost
-solr_port=8983
-rsyncd_port=18983
-data_dir=
-webapp_name=solr
-master_host=
-master_data_dir=
-master_status_dir=
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/solrconfig.xml b/src/test/resources/solr-home-4.1/collection1/conf/solrconfig.xml
deleted file mode 100644
index af08132..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/solrconfig.xml
+++ /dev/null
@@ -1,1785 +0,0 @@
-
-
-
-
-
-
-
-
- LUCENE_41
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.data.dir:}
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ${solr.ulog.dir:}
-
-
-
-
- 15000
- false
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 1024
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- true
-
-
-
-
-
- 20
-
-
- 200
-
-
-
-
-
-
-
-
-
-
-
- static firstSearcher warming in solrconfig.xml
-
-
-
-
-
- false
-
-
- 2
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- 10
- text
-
-
-
-
-
-
-
-
-
-
-
-
-
- explicit
- json
- true
- text
-
-
-
-
-
-
-
- true
- json
- true
-
-
-
-
-
-
-
- explicit
-
-
- velocity
- browse
- layout
- Solritas
-
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
-
- text
- 100%
- *:*
- 10
- *,score
-
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
- title^10.0 description^5.0 keywords^5.0 author^2.0 resourcename^1.0
-
- text,features,name,sku,id,manu,cat,title,description,keywords,author,resourcename
- 3
-
-
- on
- cat
- manu_exact
- content_type
- author_s
- ipod
- GB
- 1
- cat,inStock
- after
- price
- 0
- 600
- 50
- popularity
- 0
- 10
- 3
- manufacturedate_dt
- NOW/YEAR-10YEARS
- NOW
- +1YEAR
- before
- after
-
-
- on
- content features title name
- html
- <b>
- </b>
- 0
- title
- 0
- name
- 3
- 200
- content
- 750
-
-
- on
- false
- 5
- 2
- 5
- true
- true
- 5
- 3
-
-
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
-
-
-
-
- application/json
-
-
-
-
- application/csv
-
-
-
-
-
-
- true
- ignored_
-
-
- true
- links
- ignored_
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- solrpingquery
-
-
- all
-
-
-
-
-
-
-
-
- explicit
- true
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- textSpell
-
-
-
-
-
- default
- name
- solr.DirectSolrSpellChecker
-
- internal
-
- 0.5
-
- 2
-
- 1
-
- 5
-
- 4
-
- 0.01
-
-
-
-
-
- wordbreak
- solr.WordBreakSolrSpellChecker
- name
- true
- true
- 10
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text
-
- default
- wordbreak
- on
- true
- 10
- 5
- 5
- true
- true
- 10
- 5
-
-
- spellcheck
-
-
-
-
-
-
-
-
-
- text
- true
-
-
- tvComponent
-
-
-
-
-
-
-
-
- default
-
-
- org.carrot2.clustering.lingo.LingoClusteringAlgorithm
-
-
- 20
-
-
- clustering/carrot2
-
-
- ENGLISH
-
-
- stc
- org.carrot2.clustering.stc.STCClusteringAlgorithm
-
-
-
-
-
-
- true
- default
- true
-
- name
- id
-
- features
-
- true
-
-
-
- false
-
- edismax
-
- text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4
-
- *:*
- 10
- *,score
-
-
- clustering
-
-
-
-
-
-
-
-
-
- true
- false
-
-
- terms
-
-
-
-
-
-
-
- string
- elevate.xml
-
-
-
-
-
- explicit
- text
-
-
- elevator
-
-
-
-
-
-
-
-
-
-
- 100
-
-
-
-
-
-
-
- 70
-
- 0.5
-
- [-\w ,/\n\"']{20,200}
-
-
-
-
-
-
- ]]>
- ]]>
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- ,,
- ,,
- ,,
- ,,
- ,]]>
- ]]>
-
-
-
-
-
- 10
- .,!?
-
-
-
-
-
-
- WORD
-
-
- en
- US
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- text/plain; charset=UTF-8
-
-
-
-
-
-
-
-
- 5
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- *:*
-
-
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/spellings.txt b/src/test/resources/solr-home-4.1/collection1/conf/spellings.txt
deleted file mode 100644
index 765190a..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/spellings.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-pizza
-history
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/update-script.js b/src/test/resources/solr-home-4.1/collection1/conf/update-script.js
deleted file mode 100644
index 272cbfa..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/update-script.js
+++ /dev/null
@@ -1,53 +0,0 @@
-/*
- This is a basic skeleton JavaScript update processor.
-
- In order for this to be executed, it must be properly wired into solrconfig.xml; by default it is commented out in
- the example solrconfig.xml and must be uncommented to be enabled.
-
- See http://wiki.apache.org/solr/ScriptUpdateProcessor for more details.
-*/
-
-function processAdd(cmd) {
-
- doc = cmd.solrDoc; // org.apache.solr.common.SolrInputDocument
- id = doc.getFieldValue("id");
- logger.info("update-script#processAdd: id=" + id);
-
-// Set a field value:
-// doc.setField("foo_s", "whatever");
-
-// Get a configuration parameter:
-// config_param = params.get('config_param'); // "params" only exists if processor configured with
-
-// Get a request parameter:
-// some_param = req.getParams().get("some_param")
-
-// Add a field of field names that match a pattern:
-// - Potentially useful to determine the fields/attributes represented in a result set, via faceting on field_name_ss
-// field_names = doc.getFieldNames().toArray();
-// for(i=0; i < field_names.length; i++) {
-// field_name = field_names[i];
-// if (/attr_.*/.test(field_name)) { doc.addField("attribute_ss", field_names[i]); }
-// }
-
-}
-
-function processDelete(cmd) {
- // no-op
-}
-
-function processMergeIndexes(cmd) {
- // no-op
-}
-
-function processCommit(cmd) {
- // no-op
-}
-
-function processRollback(cmd) {
- // no-op
-}
-
-function finish() {
- // no-op
-}
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/velocity/VM_global_library.vm b/src/test/resources/solr-home-4.1/collection1/conf/velocity/VM_global_library.vm
deleted file mode 100644
index f11aebd..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/velocity/VM_global_library.vm
+++ /dev/null
@@ -1,168 +0,0 @@
-
-#macro(param $key)$request.params.get($key)#end
-
-#macro(url_root)/solr#end
-
-## TODO: s/url_for_solr/url_for_core/ and s/url_root/url_for_solr/
-#macro(core_name)$request.core.name#end
-#macro(url_for_solr)#{url_root}#if($request.core.name != "")/$request.core.name#end#end
-#macro(url_for_home)#url_for_solr/browse#end
-
-#macro(q)&q=$!{esc.url($params.get('q'))}#end
-
-#macro(fqs $p)#foreach($fq in $p)#if($velocityCount>1){end}fq=$esc.url($fq)#end#end
-
-#macro(debug)#if($request.params.get('debugQuery'))&debugQuery=true#end#end
-
-#macro(boostPrice)#if($request.params.get('bf') == 'price')&bf=price#end#end
-
-#macro(annotate)#if($request.params.get('annotateBrowse'))&annotateBrowse=true#end#end
-
-#macro(annTitle $msg)#if($annotate == true)title="$msg"#end#end
-
-#macro(spatial)#if($request.params.get('sfield'))&sfield=store#end#if($request.params.get('pt'))&pt=$request.params.get('pt')#end#if($request.params.get('d'))&d=$request.params.get('d')#end#end
-
-#macro(qOpts)#set($queryOpts = $request.params.get("queryOpts"))#if($queryOpts && $queryOpts != "")&queryOpts=$queryOpts#end#end
-
-#macro(group)#if($request.params.getBool("group") == true)&group=true#end#if($request.params.get("group.field"))#foreach($grp in $request.params.getParams('group.field'))&group.field=$grp#end#end#end
-
-#macro(lensNoQ)?#if($request.params.getParams('fq') and $list.size($request.params.getParams('fq')) > 0)fqs($request.params.getParams('fq'))#end#debug#boostPrice#annotate#spatial#qOpts#group#end
-#macro(lens)#lensNoQ#q#end
-
-
-#macro(url_for_lens)#{url_for_home}#lens#end
-
-#macro(url_for_start $start)#url_for_home#lens&start=$start#end
-
-#macro(url_for_filters $p)#url_for_home?#q#boostPrice#spatial#qOpts#if($list.size($p) > 0)fqs($p)#end#debug#end
-
-#macro(url_for_nested_facet_query $field)#url_for_home#lens&fq=$esc.url($field)#end
-
-## TODO: convert to use {!raw f=$field}$value (with escaping of course)
-#macro(url_for_facet_filter $field $value)#url_for_home#lens&fq=$esc.url($field):%22$esc.url($value)%22#end
-
-#macro(url_for_facet_date_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
-
-#macro(url_for_facet_range_filter $field $value)#url_for_home#lens&fq=$esc.url($field):$esc.url($value)#end
-
-
-#macro(link_to_previous_page $text)
- #if($page.current_page_number > 1)
- #set($prev_start = $page.start - $page.results_per_page)
- $text
- #end
-#end
-
-#macro(link_to_next_page $text)
- #if($page.current_page_number < $page.page_count)
- #set($next_start = $page.start + $page.results_per_page)
- $text
- #end
-#end
-
-#macro(link_to_page $page_number $text)
- #if($page_number == $page.current_page_number)
- $text
- #else
- #if($page_number <= $page.page_count)
- #set($page_start = $page_number * $page.results_per_page - $page.results_per_page)
- $text
- #end
- #end
-#end
-
-#macro(display_facet_query $field, $display, $fieldName)
- #if($field.size() > 0)
- $display
-
- #if($response.response.get('grouped'))
- $response.response.get('grouped').size() group(s) found in ${response.responseHeader.QTime} ms
- #else$page.results_found results found in ${response.responseHeader.QTime} ms
- Page $page.current_page_number of $page.page_count#end
-
Disclaimer: The locations displayed in this demonstration are purely fictional. It is more than likely that no store with the items listed actually exists at that location!
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/velocity/head.vm b/src/test/resources/solr-home-4.1/collection1/conf/velocity/head.vm
deleted file mode 100644
index 4fcdb4e..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/velocity/head.vm
+++ /dev/null
@@ -1,28 +0,0 @@
-
- ## An example of using an arbitrary request parameter
- #param('title')
-
-
-
-
-
-
-
-
-
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/velocity/header.vm b/src/test/resources/solr-home-4.1/collection1/conf/velocity/header.vm
deleted file mode 100644
index 8037e89..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/velocity/header.vm
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-#end
diff --git a/src/test/resources/solr-home-4.1/collection1/conf/velocity/richtext-doc.vm b/src/test/resources/solr-home-4.1/collection1/conf/velocity/richtext-doc.vm
deleted file mode 100644
index 30b7d5e..0000000
--- a/src/test/resources/solr-home-4.1/collection1/conf/velocity/richtext-doc.vm
+++ /dev/null
@@ -1,114 +0,0 @@
-## Mimetype to extension map for detecting file type and show icon
-## List of types match the icons in /solr/img/filetypes
-#set($extMap = {"application/x-7z-compressed": "7z",
- "application/postscript": "ai",
- "application/pgp-signature": "asc",
- "application/octet-stream": "bin",
- "application/x-bzip2": "bz2",
- "text/x-c": "c",
- "application/vnd.ms-htmlhelp": "chm",
- "application/java-vm": "class",
- "text/css": "css",
- "text/csv": "csv",
- "application/x-debian-package": "deb",
- "application/msword": "doc",
- "message/rfc822": "eml",
- "image/gif": "gif",
- "application/winhlp": "hlp",
- "text/html": "html",
- "application/java-archive": "jar",
- "text/x-java-source": "java",
- "image/jpeg": "jpeg",
- "application/javascript": "js",
- "application/vnd.oasis.opendocument.chart": "odc",
- "application/vnd.oasis.opendocument.formula": "odf",
- "application/vnd.oasis.opendocument.graphics": "odg",
- "application/vnd.oasis.opendocument.image": "odi",
- "application/vnd.oasis.opendocument.presentation": "odp",
- "application/vnd.oasis.opendocument.spreadsheet": "ods",
- "application/vnd.oasis.opendocument.text": "odt",
- "application/pdf": "pdf",
- "application/pgp-encrypted": "pgp",
- "image/png": "png",
- "application/vnd.ms-powerpoint": "ppt",
- "audio/x-pn-realaudio": "ram",
- "application/x-rar-compressed": "rar",
- "application/vnd.rn-realmedia": "rm",
- "application/rtf": "rtf",
- "application/x-shockwave-flash": "swf",
- "application/vnd.sun.xml.calc": "sxc",
- "application/vnd.sun.xml.draw": "sxd",
- "application/vnd.sun.xml.impress": "sxi",
- "application/vnd.sun.xml.writer": "sxw",
- "application/x-tar": "tar",
- "application/x-tex": "tex",
- "text/plain": "txt",
- "text/x-vcard": "vcf",
- "application/vnd.visio": "vsd",
- "audio/x-wav": "wav",
- "audio/x-ms-wma": "wma",
- "video/x-ms-wmv": "wmv",
- "application/vnd.ms-excel": "xls",
- "application/xml": "xml",
- "application/x-xpinstall": "xpi",
- "application/zip": "zip"})
-
-
-#if($doc.getFieldValue('title'))
- #set($title = $esc.html($doc.getFirstValue('title')))
-#else
- #set($title = "["+$doc.getFieldValue('id')+"]")
-#end
-#if($doc.getFieldValue('url'))
- #set($url = $doc.getFieldValue('url'))
-#elseif($doc.getFieldValue('resourcename'))
- #set($url = "file:///$doc.getFieldValue('resourcename')")
-#else
- #set($url = "$doc.getFieldValue('id')")
-#end
-#set($supportedtypes = "7z;ai;aiff;asc;audio;bin;bz2;c;cfc;cfm;chm;class;conf;cpp;cs;css;csv;deb;divx;doc;dot;eml;enc;file;gif;gz;hlp;htm;html;image;iso;jar;java;jpeg;jpg;js;lua;m;mm;mov;mp3;mpg;odc;odf;odg;odi;odp;ods;odt;ogg;pdf;pgp;php;pl;png;ppt;ps;py;ram;rar;rb;rm;rpm;rtf;sig;sql;swf;sxc;sxd;sxi;sxw;tar;tex;tgz;txt;vcf;video;vsd;wav;wma;wmv;xls;xml;xpi;xvid;zip")
-#set($ct = $list.get($doc.getFirstValue('content_type').split(";"),0))
-#set($filename = $doc.getFieldValue('resourcename'))
-
-#set($filetype = false)
-#set($filetype = $extMap.get($ct))
-##TODO: falling back to file extension is convenient, except when you don't have an icon for that extension
-## example "application/vnd.openxmlformats-officedocument.wordprocessingml.document" document
-## with a .docx extension. It'd be nice to fall back to an "unknown" or the existing "file" type
-## We sort of do this below, but only if the filename has no extension (anything after the last dot).
-#if(!$filetype)#set($filetype = $filename.substring($filename.lastIndexOf(".")).substring(1))#end
-##if(!$filetype)#set($filetype = "file")#end
-##if(!$supportedtypes.contains($filetype))#set($filetype = "file")#end
-
-## Small file type icons from http://www.splitbrain.org/projects/file_icons (public domain)
-
-$title #if($params.getBool('mlt', false) == false)More Like This#end