diff --git a/pom.xml b/pom.xml
index a6e66d6..1e78000 100644
--- a/pom.xml
+++ b/pom.xml
@@ -51,6 +51,11 @@ THE POSSIBILITY OF SUCH DAMAGE.
lucene-test-framework
${lucene.version}
+
+ org.apache.lucene
+ lucene-grouping
+ ${lucene.version}
+
org.apache.httpcomponents
httpclient
diff --git a/src/main/java/com/cloudant/fdblucene/benchmark/BenchmarkUtil.java b/src/main/java/com/cloudant/fdblucene/benchmark/BenchmarkUtil.java
index b526875..f172745 100644
--- a/src/main/java/com/cloudant/fdblucene/benchmark/BenchmarkUtil.java
+++ b/src/main/java/com/cloudant/fdblucene/benchmark/BenchmarkUtil.java
@@ -9,4 +9,97 @@ public enum SearchTypeEnum {
Default, BySort, ByGroup;
}
+ /** returned array might have dups */
+ public static BytesRef[] randomStrings(int count, Random random) {
+ final BytesRef[] strings = new BytesRef[count];
+ int i = 0;
+ while(i < count) {
+ final String s = randomRealisticUnicodeString(random);
+ if (s.length() >= 7) {
+ strings[i++] = new BytesRef(s);
+ }
+ }
+ return strings;
+ }
+
+ /** Returns random string of length between 0-20 codepoints, all codepoints within the same unicode block. */
+ public static String randomRealisticUnicodeString(Random r) {
+ return randomRealisticUnicodeString(r, 20);
+ }
+
+ /** Returns random string of length up to maxLength codepoints , all codepoints within the same unicode block. */
+ public static String randomRealisticUnicodeString(Random r, int maxLength) {
+ return randomRealisticUnicodeString(r, 0, maxLength);
+ }
+
+ /** Returns random string of length between min and max codepoints, all codepoints within the same unicode block. */
+ public static String randomRealisticUnicodeString(Random r, int minLength, int maxLength) {
+ final int end = nextInt(r, minLength, maxLength);
+ final int block = r.nextInt(blockStarts.length);
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < end; i++)
+ sb.appendCodePoint(nextInt(r, blockStarts[block], blockEnds[block]));
+ return sb.toString();
+ }
+
+ public static int nextInt(Random r, int start, int end) {
+ if (end == start) {
+ return start;
+ } else {
+ return start + r.nextInt(end-start+1);
+ }
+ }
+
+
+ private static final int[] blockStarts = {
+ 0x0000, 0x0080, 0x0100, 0x0180, 0x0250, 0x02B0, 0x0300, 0x0370, 0x0400,
+ 0x0500, 0x0530, 0x0590, 0x0600, 0x0700, 0x0750, 0x0780, 0x07C0, 0x0800,
+ 0x0900, 0x0980, 0x0A00, 0x0A80, 0x0B00, 0x0B80, 0x0C00, 0x0C80, 0x0D00,
+ 0x0D80, 0x0E00, 0x0E80, 0x0F00, 0x1000, 0x10A0, 0x1100, 0x1200, 0x1380,
+ 0x13A0, 0x1400, 0x1680, 0x16A0, 0x1700, 0x1720, 0x1740, 0x1760, 0x1780,
+ 0x1800, 0x18B0, 0x1900, 0x1950, 0x1980, 0x19E0, 0x1A00, 0x1A20, 0x1B00,
+ 0x1B80, 0x1C00, 0x1C50, 0x1CD0, 0x1D00, 0x1D80, 0x1DC0, 0x1E00, 0x1F00,
+ 0x2000, 0x2070, 0x20A0, 0x20D0, 0x2100, 0x2150, 0x2190, 0x2200, 0x2300,
+ 0x2400, 0x2440, 0x2460, 0x2500, 0x2580, 0x25A0, 0x2600, 0x2700, 0x27C0,
+ 0x27F0, 0x2800, 0x2900, 0x2980, 0x2A00, 0x2B00, 0x2C00, 0x2C60, 0x2C80,
+ 0x2D00, 0x2D30, 0x2D80, 0x2DE0, 0x2E00, 0x2E80, 0x2F00, 0x2FF0, 0x3000,
+ 0x3040, 0x30A0, 0x3100, 0x3130, 0x3190, 0x31A0, 0x31C0, 0x31F0, 0x3200,
+ 0x3300, 0x3400, 0x4DC0, 0x4E00, 0xA000, 0xA490, 0xA4D0, 0xA500, 0xA640,
+ 0xA6A0, 0xA700, 0xA720, 0xA800, 0xA830, 0xA840, 0xA880, 0xA8E0, 0xA900,
+ 0xA930, 0xA960, 0xA980, 0xAA00, 0xAA60, 0xAA80, 0xABC0, 0xAC00, 0xD7B0,
+ 0xE000, 0xF900, 0xFB00, 0xFB50, 0xFE00, 0xFE10,
+ 0xFE20, 0xFE30, 0xFE50, 0xFE70, 0xFF00, 0xFFF0,
+ 0x10000, 0x10080, 0x10100, 0x10140, 0x10190, 0x101D0, 0x10280, 0x102A0,
+ 0x10300, 0x10330, 0x10380, 0x103A0, 0x10400, 0x10450, 0x10480, 0x10800,
+ 0x10840, 0x10900, 0x10920, 0x10A00, 0x10A60, 0x10B00, 0x10B40, 0x10B60,
+ 0x10C00, 0x10E60, 0x11080, 0x12000, 0x12400, 0x13000, 0x1D000, 0x1D100,
+ 0x1D200, 0x1D300, 0x1D360, 0x1D400, 0x1F000, 0x1F030, 0x1F100, 0x1F200,
+ 0x20000, 0x2A700, 0x2F800, 0xE0000, 0xE0100, 0xF0000, 0x100000
+ };
+
+ private static final int[] blockEnds = {
+ 0x007F, 0x00FF, 0x017F, 0x024F, 0x02AF, 0x02FF, 0x036F, 0x03FF, 0x04FF,
+ 0x052F, 0x058F, 0x05FF, 0x06FF, 0x074F, 0x077F, 0x07BF, 0x07FF, 0x083F,
+ 0x097F, 0x09FF, 0x0A7F, 0x0AFF, 0x0B7F, 0x0BFF, 0x0C7F, 0x0CFF, 0x0D7F,
+ 0x0DFF, 0x0E7F, 0x0EFF, 0x0FFF, 0x109F, 0x10FF, 0x11FF, 0x137F, 0x139F,
+ 0x13FF, 0x167F, 0x169F, 0x16FF, 0x171F, 0x173F, 0x175F, 0x177F, 0x17FF,
+ 0x18AF, 0x18FF, 0x194F, 0x197F, 0x19DF, 0x19FF, 0x1A1F, 0x1AAF, 0x1B7F,
+ 0x1BBF, 0x1C4F, 0x1C7F, 0x1CFF, 0x1D7F, 0x1DBF, 0x1DFF, 0x1EFF, 0x1FFF,
+ 0x206F, 0x209F, 0x20CF, 0x20FF, 0x214F, 0x218F, 0x21FF, 0x22FF, 0x23FF,
+ 0x243F, 0x245F, 0x24FF, 0x257F, 0x259F, 0x25FF, 0x26FF, 0x27BF, 0x27EF,
+ 0x27FF, 0x28FF, 0x297F, 0x29FF, 0x2AFF, 0x2BFF, 0x2C5F, 0x2C7F, 0x2CFF,
+ 0x2D2F, 0x2D7F, 0x2DDF, 0x2DFF, 0x2E7F, 0x2EFF, 0x2FDF, 0x2FFF, 0x303F,
+ 0x309F, 0x30FF, 0x312F, 0x318F, 0x319F, 0x31BF, 0x31EF, 0x31FF, 0x32FF,
+ 0x33FF, 0x4DBF, 0x4DFF, 0x9FFF, 0xA48F, 0xA4CF, 0xA4FF, 0xA63F, 0xA69F,
+ 0xA6FF, 0xA71F, 0xA7FF, 0xA82F, 0xA83F, 0xA87F, 0xA8DF, 0xA8FF, 0xA92F,
+ 0xA95F, 0xA97F, 0xA9DF, 0xAA5F, 0xAA7F, 0xAADF, 0xABFF, 0xD7AF, 0xD7FF,
+ 0xF8FF, 0xFAFF, 0xFB4F, 0xFDFF, 0xFE0F, 0xFE1F,
+ 0xFE2F, 0xFE4F, 0xFE6F, 0xFEFF, 0xFFEF, 0xFFFF,
+ 0x1007F, 0x100FF, 0x1013F, 0x1018F, 0x101CF, 0x101FF, 0x1029F, 0x102DF,
+ 0x1032F, 0x1034F, 0x1039F, 0x103DF, 0x1044F, 0x1047F, 0x104AF, 0x1083F,
+ 0x1085F, 0x1091F, 0x1093F, 0x10A5F, 0x10A7F, 0x10B3F, 0x10B5F, 0x10B7F,
+ 0x10C4F, 0x10E7F, 0x110CF, 0x123FF, 0x1247F, 0x1342F, 0x1D0FF, 0x1D1FF,
+ 0x1D24F, 0x1D35F, 0x1D37F, 0x1D7FF, 0x1F02F, 0x1F09F, 0x1F1FF, 0x1F2FF,
+ 0x2A6DF, 0x2B73F, 0x2FA1F, 0xE007F, 0xE01EF, 0xFFFFF, 0x10FFFF
+ };
}
diff --git a/src/main/java/com/cloudant/fdblucene/benchmark/GroupSearchBenchmark.java b/src/main/java/com/cloudant/fdblucene/benchmark/GroupSearchBenchmark.java
new file mode 100644
index 0000000..e3a7c8b
--- /dev/null
+++ b/src/main/java/com/cloudant/fdblucene/benchmark/GroupSearchBenchmark.java
@@ -0,0 +1,116 @@
+package com.cloudant.fdblucene.benchmark;
+
+import java.util.Collection;
+import java.util.concurrent.TimeUnit;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.CachingCollector;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortField.Type;
+
+import org.apache.lucene.search.grouping.FirstPassGroupingCollector;
+import org.apache.lucene.search.grouping.TermGroupSelector;
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.Blackhole;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.RunnerException;
+import org.openjdk.jmh.runner.options.Options;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+import org.openjdk.jmh.annotations.Scope;
+
+public class GroupSearchBenchmark {
+
+ @State(Scope.Benchmark)
+ public static class FDBGroupSearchBenchmark {
+
+ private FDBSearchSetup fdbSetup;
+
+ @BenchmarkMode(Mode.Throughput)
+ @Fork(1)
+ @Warmup(iterations = 3, time = 10, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 3, time = 10, timeUnit = TimeUnit.MINUTES)
+ @Timeout(time = 5, timeUnit = TimeUnit.MINUTES)
+ @OutputTimeUnit(TimeUnit.SECONDS)
+ @Benchmark
+ @Group("searchFDBByGroup")
+ @GroupThreads(1)
+ public void searchFDBByGroup(Blackhole blackhole) throws Exception {
+ int randomSearchPosition = fdbSetup.random.nextInt(fdbSetup.searchTermList.size());
+ String term = fdbSetup.searchTermList.get(randomSearchPosition);
+ Sort groupSort = new Sort(SortField.FIELD_SCORE,
+ new SortField("_id", Type.STRING));
+ FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(
+ new TermGroupSelector("group100"), groupSort, fdbSetup.topNDocs);
+ boolean cacheScores = true;
+ double maxCacheRAMMB = 4.0;
+ CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
+ fdbSetup.searcher.search(new TermQuery(new Term("body", term)), cachedCollector);
+ Collection topGroups = c1.getTopGroups(0);
+ if (topGroups == null) {
+ // No groups matched
+ return;
+ }
+ blackhole.consume(topGroups.size());
+ }
+
+ @Setup(Level.Trial)
+ public void setup() throws Exception {
+ fdbSetup = new FDBSearchSetup();
+ fdbSetup.setSearchType(BenchmarkUtil.SearchTypeEnum.ByGroup);
+ fdbSetup.startFDBNetworking();
+ fdbSetup.createReader();
+ }
+ }
+
+ @State(Scope.Benchmark)
+ public static class NioGroupSearchBenchmark {
+
+ private NIOSSearchSetup nioSetup;
+
+ @BenchmarkMode(Mode.Throughput)
+ @Fork(1)
+ @Warmup(iterations = 3, time = 10, timeUnit = TimeUnit.SECONDS)
+ @Measurement(iterations = 3, time = 10, timeUnit = TimeUnit.MINUTES)
+ @Timeout(time = 5, timeUnit = TimeUnit.MINUTES)
+ @OutputTimeUnit(TimeUnit.SECONDS)
+ @Benchmark
+ @Group("searchNIOSByGroup")
+ @GroupThreads(1)
+ public void searchNIOSByGroup(Blackhole blackhole) throws Exception {
+ int randomSearchPosition = nioSetup.random.nextInt(nioSetup.searchTermList.size());
+ String term = nioSetup.searchTermList.get(randomSearchPosition);
+ Sort groupSort = new Sort(SortField.FIELD_SCORE,
+ new SortField("_id", Type.STRING));
+ FirstPassGroupingCollector c1 = new FirstPassGroupingCollector(
+ new TermGroupSelector("group100"), groupSort, nioSetup.topNDocs);
+ boolean cacheScores = true;
+ double maxCacheRAMMB = 4.0;
+ CachingCollector cachedCollector = CachingCollector.create(c1, cacheScores, maxCacheRAMMB);
+ nioSetup.searcher.search(new TermQuery(new Term("body", term)), cachedCollector);
+ Collection topGroups = c1.getTopGroups(0);
+ if (topGroups == null) {
+ blackhole.consume(topGroups);
+ return;
+ }
+ blackhole.consume(topGroups.size());
+ }
+
+ @Setup(Level.Trial)
+ public void setup() throws Exception {
+ nioSetup = new NIOSSearchSetup();
+ nioSetup.setSearchType(BenchmarkUtil.SearchTypeEnum.ByGroup);
+ nioSetup.setupNIOS();
+ nioSetup.createReader();
+ }
+ }
+
+ public static void main(final String[] args) throws RunnerException {
+ final Options opt = new OptionsBuilder().include(GroupSearchBenchmark.class.getSimpleName()).build();
+ new Runner(opt).run();
+ }
+
+}
+
+
diff --git a/src/main/java/com/cloudant/fdblucene/benchmark/SearchSetup.java b/src/main/java/com/cloudant/fdblucene/benchmark/SearchSetup.java
index a60ddd2..dc8dffe 100644
--- a/src/main/java/com/cloudant/fdblucene/benchmark/SearchSetup.java
+++ b/src/main/java/com/cloudant/fdblucene/benchmark/SearchSetup.java
@@ -11,6 +11,7 @@
import org.apache.lucene.codecs.lucene80.Lucene80Codec;
import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
@@ -60,6 +61,19 @@ public void setup() throws Exception {
cleanDirectory();
writer = new IndexWriter(dir, config);
random = new Random();
+
+ final BytesRef[] group100 = BenchmarkUtil.randomStrings(100, random);
+ final BytesRef[] group10K = BenchmarkUtil.randomStrings(10000, random);
+ final BytesRef[] group100K = BenchmarkUtil.randomStrings(100000, random);
+ final BytesRef[] group1M = BenchmarkUtil.randomStrings(1000000, random);
+
+ Field group100Field = new SortedDocValuesField("group100", new BytesRef());
+ Field group100KField = new SortedDocValuesField("group10K", new BytesRef());
+ Field group10KField = new SortedDocValuesField("group100K", new BytesRef());
+ Field group1MField = new SortedDocValuesField("group1M", new BytesRef());
+ Field groupBlockField = new SortedDocValuesField("groupblock", new BytesRef());
+ Field groupEndField = new StringField("groupend", "x", Field.Store.NO);
+
for (int i = 0; i < docsToIndex; i++) {
docs = new LineFileDocs(random, LuceneTestCase.DEFAULT_LINE_DOCS_FILE);
doc = docs.nextDoc();
@@ -82,6 +96,20 @@ public void setup() throws Exception {
} else if (searchType == BenchmarkUtil.SearchTypeEnum.BySort) {
doc.add(new SortedDocValuesField ("_id",
new BytesRef("doc-" + counter.incrementAndGet())));
+ } else if (searchType == BenchmarkUtil.SearchTypeEnum.ByGroup) {
+ group100Field.setBytesValue(group100[i%100]);
+ group10KField.setBytesValue(group10K[i%10000]);
+ group100KField.setBytesValue(group100K[i%100000]);
+ group1MField.setBytesValue(group1M[i%1000000]);
+
+ doc.add(new SortedDocValuesField ("_id",
+ new BytesRef("doc-" + counter.incrementAndGet())));
+ doc.add(group100Field);
+ doc.add(group10KField);
+ doc.add(group100KField);
+ doc.add(group1MField);
+ doc.add(groupBlockField);
+ doc.add(groupEndField);
} else {
throw new IllegalArgumentException();
}