From ee3c848dc7aec5a6212ebb452ff6c49c581233bd Mon Sep 17 00:00:00 2001 From: Robert Newson Date: Fri, 9 Aug 2019 21:21:26 +0100 Subject: [PATCH] Remove FDBIndex{Writer,Reader} so we can use fdb 6.0.18 --- pom.xml | 2 +- .../fdblucene/DocIdCollisionException.java | 30 -- .../com/cloudant/fdblucene/FDBAccess.java | 150 ------- .../cloudant/fdblucene/FDBIndexReader.java | 233 ----------- .../cloudant/fdblucene/FDBIndexWriter.java | 366 ------------------ .../fdblucene/FDBNumericDocValues.java | 88 ----- .../cloudant/fdblucene/FDBNumericPoint.java | 74 ---- .../cloudant/fdblucene/FDBPostingsEnum.java | 140 ------- .../java/com/cloudant/fdblucene/FDBTerms.java | 86 ---- .../com/cloudant/fdblucene/FDBTermsEnum.java | 175 --------- .../java/com/cloudant/fdblucene/Undo.java | 146 ------- .../java/com/cloudant/fdblucene/Utils.java | 3 +- .../com/cloudant/fdblucene/DocDeleteTest.java | 99 ----- .../fdblucene/FDBIndexReaderWriterTest.java | 249 ------------ .../com/cloudant/fdblucene/ScoreTest.java | 168 -------- 15 files changed, 2 insertions(+), 2007 deletions(-) delete mode 100644 src/main/java/com/cloudant/fdblucene/DocIdCollisionException.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBAccess.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBIndexReader.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBIndexWriter.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBNumericDocValues.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBNumericPoint.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBPostingsEnum.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBTerms.java delete mode 100644 src/main/java/com/cloudant/fdblucene/FDBTermsEnum.java delete mode 100644 src/main/java/com/cloudant/fdblucene/Undo.java delete mode 100644 src/test/java/com/cloudant/fdblucene/DocDeleteTest.java delete mode 100644 src/test/java/com/cloudant/fdblucene/FDBIndexReaderWriterTest.java delete mode 100644 src/test/java/com/cloudant/fdblucene/ScoreTest.java diff --git a/pom.xml b/pom.xml index 03c3d7e..4b1e7b3 100644 --- a/pom.xml +++ b/pom.xml @@ -46,7 +46,7 @@ org.foundationdb fdb-java - 6.1.8 + 6.0.18 org.apache.commons diff --git a/src/main/java/com/cloudant/fdblucene/DocIdCollisionException.java b/src/main/java/com/cloudant/fdblucene/DocIdCollisionException.java deleted file mode 100644 index f4d5773..0000000 --- a/src/main/java/com/cloudant/fdblucene/DocIdCollisionException.java +++ /dev/null @@ -1,30 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -final class DocIdCollisionException extends RuntimeException { - - private static final long serialVersionUID = -8418485954882085985L; - - DocIdCollisionException(final int docID) { - super("collision on doc id " + docID); - } - - DocIdCollisionException() { - super("Unable to find a free doc id"); - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBAccess.java b/src/main/java/com/cloudant/fdblucene/FDBAccess.java deleted file mode 100644 index a370642..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBAccess.java +++ /dev/null @@ -1,150 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.nio.charset.StandardCharsets; - -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.util.BytesRef; - -import com.apple.foundationdb.Range; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.Tuple; - -final class FDBAccess { - - static byte[] docIDKey(final Subspace index, final int docID) { - return index.pack(Tuple.from("d", docID)); - } - - static byte[] normKey(final Subspace index, final String fieldName, final int docID) { - return index.pack(Tuple.from("nv", fieldName, docID)); - } - - static Subspace normSubspace(final Subspace index, final String fieldName) { - final Tuple t = Tuple.from("nv", fieldName); - return index.get(t); - } - - static byte[] numericDocValuesKey(final Subspace index, final String fieldName, final int docID) { - return index.pack(Tuple.from("ndv", fieldName, docID)); - } - - static Subspace numericDocValuesSubspace(final Subspace index, final String fieldName) { - final Tuple t = Tuple.from("ndv", fieldName); - return index.get(t); - } - - static Range docFreqRange(final Subspace index, final String fieldName) { - return index.range(Tuple.from("df", fieldName)); - } - - static byte[] docFreqKey(final Subspace index, final String fieldName, final BytesRef term) { - return index.pack(Tuple.from("df", fieldName, Utils.toBytes(term))); - } - - static byte[] totalTermFreqKey(final Subspace index, final String fieldName, final BytesRef term) { - return index.pack(Tuple.from("ttf", fieldName, Utils.toBytes(term))); - } - - static byte[] postingsMetaKey(final Subspace index, final String fieldName, final BytesRef term, final int docID) { - final Tuple t = Tuple.from("pm", fieldName, Utils.toBytes(term), docID); - return index.pack(t); - } - - static byte[] postingsPositionKey( - final Subspace index, - final String fieldName, - final BytesRef term, - final int docID, - final int pos) { - final Tuple t = Tuple.from("pp", fieldName, Utils.toBytes(term), docID, pos); - return index.pack(t); - } - - static Subspace postingsMetaSubspace(final Subspace index, final String fieldName, final BytesRef term) { - final Tuple t = Tuple.from("pm", fieldName, Utils.toBytes(term)); - return index.get(t); - } - - static Subspace postingsPositionSubspace( - final Subspace index, - final String fieldName, - final BytesRef term, - final int docID) { - final Tuple t = Tuple.from("pp", fieldName, Utils.toBytes(term), docID); - return index.get(t); - } - - static byte[] postingsValue(final int startOffset, final int endOffset, final BytesRef payload) { - return Tuple.from(startOffset, endOffset, payload == null ? null : Utils.toBytes(payload)).pack(); - } - - static Range storedRange(final Subspace index, final int docID) { - return index.range(Tuple.from("s", docID)); - } - - static byte[] storedKey(final Subspace index, final int docID, final String fieldName) { - final Tuple t = Tuple.from("s", docID, fieldName); - return index.pack(t); - } - - static byte[] storedValue(final IndexableField field) { - Number number = field.numericValue(); - if (number != null) { - if (number instanceof Byte || number instanceof Short || number instanceof Integer) { - return Tuple.from("i", number).pack(); - } else if (number instanceof Long) { - return Tuple.from("l", number).pack(); - } else if (number instanceof Float) { - return Tuple.from("f", number).pack(); - } else if (number instanceof Double) { - return Tuple.from("d", number).pack(); - } else { - throw new IllegalArgumentException("cannot store numeric type " + number.getClass()); - } - } - - final BytesRef ref = field.binaryValue(); - if (ref != null) { - return Tuple.from("b", Utils.toBytes(ref)).pack(); - } - - final String string = field.stringValue(); - return Tuple.from("s", string.getBytes(StandardCharsets.UTF_8)).pack(); - } - - static byte[] numDocsKey(final Subspace index) { - return index.pack(Tuple.from("i", "nd")); - } - - static byte[] docCountKey(final Subspace index, final String fieldName) { - return index.pack(Tuple.from("f", fieldName, "dc")); - } - - static byte[] sumDocFreqKey(final Subspace index, final String fieldName) { - return index.pack(Tuple.from("f", fieldName, "sdf")); - } - - static byte[] sumTotalTermFreqKey(final Subspace index, final String fieldName) { - return index.pack(Tuple.from("f", fieldName, "sttf")); - } - - static Subspace undoSpace(final Subspace index, final int docID) { - return index.get(Tuple.from("undo", docID)); - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBIndexReader.java b/src/main/java/com/cloudant/fdblucene/FDBIndexReader.java deleted file mode 100644 index e1e53d2..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBIndexReader.java +++ /dev/null @@ -1,233 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.io.IOException; -import java.util.Collections; -import java.util.concurrent.CompletionException; - -import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.FieldInfo; -import org.apache.lucene.index.FieldInfos; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.LeafMetaData; -import org.apache.lucene.index.LeafReader; -import org.apache.lucene.index.NumericDocValues; -import org.apache.lucene.index.PointValues; -import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.index.SortedNumericDocValues; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.index.StoredFieldVisitor; -import org.apache.lucene.index.StoredFieldVisitor.Status; -import org.apache.lucene.index.Terms; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.IOSupplier; -import org.apache.lucene.util.Version; - -import com.apple.foundationdb.Range; -import com.apple.foundationdb.Transaction; -import com.apple.foundationdb.TransactionContext; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.Tuple; - -public final class FDBIndexReader extends LeafReader { - - private static final LeafMetaData LEAF_META_DATA = new LeafMetaData(8, Version.LUCENE_8_1_0, null); - - private final Subspace index; - - private final ThreadLocal txnHolder = new ThreadLocal(); - - public FDBIndexReader(final Subspace indexSubspace) { - this.index = indexSubspace; - } - - public T run(final TransactionContext txc, final IOSupplier retryable) throws IOException { - try { - return txc.run(txn -> { - txnHolder.set(txn); - try { - return retryable.get(); - } catch (final IOException e) { - throw new CompletionException(e); - } finally { - txnHolder.set(null); - } - }); - } catch (final CompletionException e) { - final Throwable cause = e.getCause(); - if (cause instanceof IOException) { - throw (IOException) cause; - } - throw e; - } - } - - private Transaction getTxn() { - final Transaction result = txnHolder.get(); - if (result == null) { - throw new IllegalStateException("Reader must be called within an FDBIndexReader.run() callback function."); - } - return result; - } - - @Override - public void checkIntegrity() throws IOException { - // No-op. - } - - @Override - public void document(final int docID, final StoredFieldVisitor visitor) throws IOException { - final Range range = FDBAccess.storedRange(index, docID); - final Transaction txn = getTxn(); - txn.getRange(range).forEach(kv -> { - final Tuple keyTuple = index.unpack(kv.getKey()); - final Tuple valueTuple = Tuple.fromBytes(kv.getValue()); - - final String fieldName = keyTuple.getString(2); - - final String fieldType = valueTuple.getString(0); - final Object fieldValue = valueTuple.get(1); - - final FieldInfo fieldInfo = new FieldInfo(fieldName, 1, false, true, false, - IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, DocValuesType.NONE, -1L, - Collections.emptyMap(), 0, 0, 0, false); - - try { - if (visitor.needsField(fieldInfo) == Status.YES) { - if ("b".equals(fieldType)) { - visitor.binaryField(fieldInfo, (byte[]) fieldValue); - } else if ("d".equals(fieldType)) { - visitor.doubleField(fieldInfo, (Double) fieldValue); - } else if ("f".equals(fieldType)) { - visitor.floatField(fieldInfo, (Float) fieldValue); - } else if ("i".equals(fieldType)) { - visitor.intField(fieldInfo, (Integer) fieldValue); - } else if ("l".equals(fieldType)) { - visitor.longField(fieldInfo, (Long) fieldValue); - } else if ("s".equals(fieldType)) { - visitor.stringField(fieldInfo, (byte[]) fieldValue); - } - } - } catch (final IOException e) { - throw new CompletionException(e); - } - }); - } - - @Override - public BinaryDocValues getBinaryDocValues(final String field) throws IOException { - throw new UnsupportedOperationException("getBinaryDocValues not supported."); - } - - @Override - public CacheHelper getCoreCacheHelper() { - return null; - } - - @Override - public FieldInfos getFieldInfos() { - throw new UnsupportedOperationException("getFieldInfos not supported."); - } - - @Override - public Bits getLiveDocs() { - return null; // We'll never return a docID for a deleted document. - } - - @Override - public LeafMetaData getMetaData() { - return LEAF_META_DATA; - } - - @Override - public NumericDocValues getNormValues(final String field) throws IOException { - return new FDBNumericDocValues(getTxn(), index, field) { - - @Override - protected Subspace valueSubspace(Subspace index, String fieldName) { - return FDBAccess.normSubspace(index, fieldName); - } - - }; - } - - @Override - public NumericDocValues getNumericDocValues(final String field) throws IOException { - return new FDBNumericDocValues(getTxn(), index, field) { - - @Override - protected Subspace valueSubspace(final Subspace index, final String fieldName) { - return FDBAccess.numericDocValuesSubspace(index, fieldName); - } - - }; - } - - @Override - public PointValues getPointValues(final String field) throws IOException { - throw new UnsupportedOperationException("getPointValues not supported."); - } - - @Override - public CacheHelper getReaderCacheHelper() { - return null; - } - - @Override - public SortedDocValues getSortedDocValues(final String field) throws IOException { - throw new UnsupportedOperationException("getSortedDocValues not supported."); - } - - @Override - public SortedNumericDocValues getSortedNumericDocValues(final String field) throws IOException { - throw new UnsupportedOperationException("getSortedNumericDocValues not supported."); - } - - @Override - public SortedSetDocValues getSortedSetDocValues(final String field) throws IOException { - throw new UnsupportedOperationException("getSortedSetDocValues not supported."); - } - - @Override - public Fields getTermVectors(final int docID) throws IOException { - throw new UnsupportedOperationException("getTermVectors not supported."); - } - - @Override - public int maxDoc() { - return DocIdSetIterator.NO_MORE_DOCS - 1; - } - - @Override - public int numDocs() { - throw new UnsupportedOperationException("numDocs not supported."); - } - - @Override - public Terms terms(final String field) throws IOException { - return new FDBTerms(getTxn(), index, field); - } - - @Override - protected void doClose() throws IOException { - // No-op. - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBIndexWriter.java b/src/main/java/com/cloudant/fdblucene/FDBIndexWriter.java deleted file mode 100644 index 4053e0a..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBIndexWriter.java +++ /dev/null @@ -1,366 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.io.IOException; -import java.util.Arrays; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Random; -import java.util.Set; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermFrequencyAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.DocValuesType; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.IndexableFieldType; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; - -import com.apple.foundationdb.Database; -import com.apple.foundationdb.MutationType; -import com.apple.foundationdb.Transaction; -import com.apple.foundationdb.async.AsyncUtil; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.ByteArrayUtil; - -public final class FDBIndexWriter { - - private static final int RETRY_LIMIT = 100; - - private final Random random = new Random(); - - private static final byte[] EMPTY = new byte[0]; - private static final byte[] ONE = ByteArrayUtil.encodeInt(1); - private static final byte[] ZERO = ByteArrayUtil.encodeInt(0); - private static final byte[] NEGATIVE_ONE = ByteArrayUtil.encodeInt(-1); - - private final Database db; - private final Subspace index; - private final Analyzer analyzer; - - public FDBIndexWriter(final Database db, final Subspace index, final Analyzer analyzer) { - this.db = db; - this.index = index; - this.analyzer = analyzer; - } - - public int addDocument(final Document doc) throws IOException { - for (int i = 0; i < RETRY_LIMIT; i++) { - final int docID = randomDocID(); - try { - db.run(txn -> { - Utils.trace(txn, "addDocument(%d)", docID); - addDocument(txn, docID, doc).join(); - txn.mutate(MutationType.ADD, FDBAccess.numDocsKey(index), ONE); - return null; - }); - return docID; - } catch (final CompletionException e) { - if (e.getCause() instanceof DocIdCollisionException) { - // Try again. - continue; - } - throw e; - } - } - throw new DocIdCollisionException(); - } - - public int[] addDocuments(final Document... docs) throws IOException { - for (int i = 0; i < RETRY_LIMIT; i++) { - final int[] ids = generateUniqueIDs(docs); - final Collection> futures = new HashSet>(); - try { - db.run(txn -> { - Utils.trace(txn, "addDocuments(%d)", docs.length); - for (int j = 0; j < docs.length; j++) { - final CompletableFuture f = addDocument(txn, ids[j], docs[j]); - futures.add(f); - } - txn.mutate(MutationType.ADD, FDBAccess.numDocsKey(index), ByteArrayUtil.encodeInt(docs.length)); - AsyncUtil.whenAll(futures); - return null; - }); - return ids; - } catch (final CompletionException e) { - if (e.getCause() instanceof DocIdCollisionException) { - // Try again. - continue; - } - throw e; - } - } - throw new DocIdCollisionException(); - } - - private int[] generateUniqueIDs(final Document... docs) { - final int[] result = new int[docs.length]; - final Set seen = new HashSet(); - do { - seen.clear(); - for (int i = 0; i < docs.length; i++) { - result[i] = randomDocID(); - seen.add(result[i]); - } - } while (seen.size() != docs.length); - return result; - } - - private CompletableFuture addDocument(final Transaction txn, final int docID, final Document doc) - throws DocIdCollisionException { - final Undo undo = new Undo(); - - final CompletableFuture future = docIDFuture(txn, docID); - txn.set(FDBAccess.docIDKey(index, docID), EMPTY); - undo.clear(FDBAccess.docIDKey(index, docID)); - undo.mutate(MutationType.ADD, FDBAccess.numDocsKey(index), NEGATIVE_ONE); - undo.mutate(MutationType.COMPARE_AND_CLEAR, FDBAccess.numDocsKey(index), ZERO); - - for (final IndexableField field : doc) { - try { - indexField(txn, docID, field, undo); - } catch (final IOException e) { - throw new CompletionException(e); - } - } - undo.save(txn, FDBAccess.undoSpace(index, docID)); - return future.thenAccept(value -> { - if (future.join() != null) { - throw new DocIdCollisionException(docID); - } - }); - } - - private CompletableFuture docIDFuture(final Transaction txn, final int docID) { - return txn.get(FDBAccess.docIDKey(index, docID)); - } - - public void deleteDocuments(final Term... terms) { - db.run(txn -> { - Utils.trace(txn, "deleteDocument(%s)", Arrays.toString(terms)); - for (final Term term : terms) { - deleteDocuments(txn, term); - } - return null; - }); - } - - public int updateDocument(final Term term, final Document doc) throws IOException { - for (int i = 0; i < RETRY_LIMIT; i++) { - final int docID = randomDocID(); - try { - return db.run(txn -> { - Utils.trace(txn, "updateDocument(%s)", term); - deleteDocuments(txn, term); - addDocument(txn, docID, doc).join(); - txn.mutate(MutationType.ADD, FDBAccess.numDocsKey(index), ONE); - return docID; - }); - } catch (final CompletionException e) { - if (e.getCause() instanceof DocIdCollisionException) { - // Try again. - continue; - } - throw e; - } - } - throw new DocIdCollisionException(); - } - - private void deleteDocuments(final Transaction txn, final Term term) { - final Subspace postings = FDBAccess.postingsMetaSubspace(index, term.field(), term.bytes()); - txn.getRange(postings.range()).forEach(kv -> { - final int docID = (int) postings.unpack(kv.getKey()).getLong(0); - deleteDocument(txn, docID); - }); - } - - private void deleteDocument(final Transaction txn, final int docID) { - final Undo undo = new Undo(); - undo.load(txn, FDBAccess.undoSpace(index, docID)); - undo.run(txn); - txn.clear(FDBAccess.undoSpace(index, docID).range()); - } - - private void indexField(final Transaction txn, final int docID, final IndexableField field, final Undo undo) - throws IOException { - final String fieldName = field.name(); - final IndexableFieldType fieldType = field.fieldType(); - - if (fieldType.indexOptions() != IndexOptions.NONE) { - indexInvertedField(txn, docID, fieldName, field, undo); - } - - if (fieldType.stored()) { - indexStoredField(txn, docID, fieldName, field, undo); - } - - final DocValuesType dvType = fieldType.docValuesType(); - if (dvType != DocValuesType.NONE) { - indexDocValue(txn, dvType, docID, fieldName, field, undo); - } - - if (fieldType.pointDataDimensionCount() > 0) { - indexPoint(txn, docID, fieldName, field, undo); - } - - txn.mutate(MutationType.ADD, FDBAccess.sumDocFreqKey(index, fieldName), ONE); - undo.mutate(MutationType.ADD, FDBAccess.sumDocFreqKey(index, fieldName), NEGATIVE_ONE); - undo.mutate(MutationType.COMPARE_AND_CLEAR, FDBAccess.sumDocFreqKey(index, fieldName), ZERO); - } - - private void indexInvertedField( - final Transaction txn, - final int docID, - final String fieldName, - final IndexableField field, - final Undo undo) throws IOException { - - try (final TokenStream stream = field.tokenStream(analyzer, null)) { - final TermToBytesRefAttribute termAttribute = stream.getAttribute(TermToBytesRefAttribute.class); - final TermFrequencyAttribute termFreqAttribute = stream.addAttribute(TermFrequencyAttribute.class); - final PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); - final OffsetAttribute offsetAttribute = stream.addAttribute(OffsetAttribute.class); - final PayloadAttribute payloadAttribute = stream.addAttribute(PayloadAttribute.class); - - int pos = 0; - stream.reset(); - final Map termFreqs = new HashMap(); - int length = 0; - - Set seen = new HashSet(); - - while (stream.incrementToken()) { - final int posIncr = posIncrAttribute.getPositionIncrement(); - final int startOffset = offsetAttribute.startOffset(); - final int endOffset = offsetAttribute.endOffset(); - final int termFreq = termFreqAttribute.getTermFrequency(); - final BytesRef term = termAttribute.getBytesRef(); - final BytesRef payload = payloadAttribute.getPayload(); - - termFreqs.compute(BytesRef.deepCopyOf(term), (k, v) -> { - return (v == null) ? 1 : v + 1; - }); - - if (!seen.contains(term)) { - txn.mutate(MutationType.ADD, FDBAccess.docFreqKey(index, fieldName, term), ONE); - undo.mutate(MutationType.ADD, FDBAccess.docFreqKey(index, fieldName, term), NEGATIVE_ONE); - undo.mutate(MutationType.COMPARE_AND_CLEAR, FDBAccess.docFreqKey(index, fieldName, term), ZERO); - seen.add(term); - } - txn.mutate( - MutationType.ADD, - FDBAccess.totalTermFreqKey(index, fieldName, term), - ByteArrayUtil.encodeInt(termFreq)); - undo.mutate( - MutationType.ADD, - FDBAccess.totalTermFreqKey(index, fieldName, term), - ByteArrayUtil.encodeInt(-termFreq)); - undo.mutate(MutationType.COMPARE_AND_CLEAR, FDBAccess.totalTermFreqKey(index, fieldName, term), ZERO); - - final byte[] postingsKey = FDBAccess.postingsPositionKey(index, fieldName, term, docID, pos); - final byte[] postingsValue = FDBAccess.postingsValue(startOffset, endOffset, payload); - txn.set(postingsKey, postingsValue); - undo.clear(postingsKey); - pos += posIncr; - length++; - } - stream.end(); - - if (!termFreqs.isEmpty()) { - termFreqs.forEach((k, v) -> { - final byte[] key = FDBAccess.postingsMetaKey(index, fieldName, k, docID); - txn.set(key, ByteArrayUtil.encodeInt(v)); - undo.clear(key); - }); - - txn.set(FDBAccess.normKey(index, fieldName, docID), ByteArrayUtil.encodeInt(length)); - undo.clear(FDBAccess.normKey(index, fieldName, docID)); - - txn.mutate(MutationType.ADD, FDBAccess.docCountKey(index, fieldName), ONE); - undo.mutate(MutationType.ADD, FDBAccess.docCountKey(index, fieldName), NEGATIVE_ONE); - undo.mutate(MutationType.COMPARE_AND_CLEAR, FDBAccess.docCountKey(index, fieldName), ZERO); - - txn.mutate( - MutationType.ADD, - FDBAccess.sumTotalTermFreqKey(index, fieldName), - ByteArrayUtil.encodeInt(length)); - undo.mutate( - MutationType.ADD, - FDBAccess.sumTotalTermFreqKey(index, fieldName), - ByteArrayUtil.encodeInt(-length)); - undo.mutate(MutationType.COMPARE_AND_CLEAR, FDBAccess.sumTotalTermFreqKey(index, fieldName), ZERO); - } - } - } - - private void indexStoredField( - final Transaction txn, - final int docID, - final String fieldName, - final IndexableField field, - final Undo undo) { - final byte[] key = FDBAccess.storedKey(index, docID, fieldName); - final byte[] value = FDBAccess.storedValue(field); - txn.set(key, value); - undo.clear(key); - } - - private void indexDocValue( - final Transaction txn, - final DocValuesType dvType, - final int docID, - final String fieldName, - final IndexableField field, - final Undo undo) { - switch (dvType) { - case NUMERIC: - final byte[] key = FDBAccess.numericDocValuesKey(index, fieldName, docID); - txn.set(key, ByteArrayUtil.encodeInt(field.numericValue().longValue())); - undo.clear(key); - break; - default: - throw new IllegalArgumentException("non-numeric DocValue not supported"); - } - } - - private void indexPoint( - final Transaction txn, - final int docID, - final String fieldName, - final IndexableField field, - final Undo undo) { - throw new IllegalArgumentException("Points not supported"); - } - - private int randomDocID() { - return random.nextInt(DocIdSetIterator.NO_MORE_DOCS); - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBNumericDocValues.java b/src/main/java/com/cloudant/fdblucene/FDBNumericDocValues.java deleted file mode 100644 index 6afc520..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBNumericDocValues.java +++ /dev/null @@ -1,88 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.io.IOException; -import java.util.List; - -import org.apache.lucene.index.NumericDocValues; - -import com.apple.foundationdb.KeyValue; -import com.apple.foundationdb.TransactionContext; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.ByteArrayUtil; - -abstract class FDBNumericDocValues extends NumericDocValues { - - private final TransactionContext txc; - private final Subspace index; - private final String fieldName; - - private int docID = -1; - private long value; - - public FDBNumericDocValues(final TransactionContext txc, final Subspace index, final String fieldName) { - this.txc = txc; - this.index = index; - this.fieldName = fieldName; - } - - protected abstract Subspace valueSubspace(final Subspace index, final String fieldName); - - @Override - public final long longValue() throws IOException { - return value; - } - - @Override - public final boolean advanceExact(final int target) throws IOException { - final Subspace subspace = valueSubspace(index, fieldName); - final byte[] begin = subspace.pack(target); - final byte[] end = subspace.range().end; - - return txc.run(txn -> { - final List list = txn.getRange(begin, end, 1).asList().join(); - if (list.isEmpty()) { - return false; - } - final KeyValue kv = list.get(0); - this.docID = (int) subspace.unpack(kv.getKey()).getLong(0); - this.value = ByteArrayUtil.decodeInt(kv.getValue()); - return this.docID == target; - }); - } - - @Override - public final int docID() { - return docID; - } - - @Override - public final int nextDoc() throws IOException { - throw new UnsupportedOperationException("nextDoc not supported."); - } - - @Override - public final int advance(int target) throws IOException { - throw new UnsupportedOperationException("advance not supported."); - } - - @Override - public final long cost() { - return 1L; - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBNumericPoint.java b/src/main/java/com/cloudant/fdblucene/FDBNumericPoint.java deleted file mode 100644 index 4c04ac4..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBNumericPoint.java +++ /dev/null @@ -1,74 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.NumericUtils; - -public class FDBNumericPoint extends Field { - - public final static FieldType TYPE; - static { - TYPE = new FieldType(); - TYPE.setIndexOptions(IndexOptions.DOCS); - TYPE.setTokenized(false); - TYPE.setOmitNorms(true); - TYPE.freeze(); - } - - public FDBNumericPoint(final String name, final Number value) { - super(name, numberToBytes(value), TYPE); - } - - public static Query newExactQuery(final String field, final Number value) { - return new TermQuery(new Term(field, numberToRef(value))); - } - - public static Query newRangeQuery(final String field, final Number lowerValue, final Number upperValue) { - return new TermRangeQuery(field, numberToRef(lowerValue), numberToRef(upperValue), true, true); - } - - static BytesRef numberToRef(final Number number) { - return new BytesRef(numberToBytes(number)); - } - - static byte[] numberToBytes(final Number number) { - if (number instanceof Double) { - final long asLong = NumericUtils.doubleToSortableLong((Double) number); - return numberToBytes(asLong); - } else if (number instanceof Float) { - final int asInt = NumericUtils.floatToSortableInt((Float) number); - return numberToBytes(asInt); - } else if (number instanceof Long) { - final byte[] result = new byte[8]; - NumericUtils.longToSortableBytes((Long) number, result, 0); - return result; - } else if (number instanceof Integer) { - final byte[] result = new byte[4]; - NumericUtils.intToSortableBytes((Integer) number, result, 0); - return result; - } - throw new IllegalArgumentException(number + " not supported."); - } - -} \ No newline at end of file diff --git a/src/main/java/com/cloudant/fdblucene/FDBPostingsEnum.java b/src/main/java/com/cloudant/fdblucene/FDBPostingsEnum.java deleted file mode 100644 index bba02a5..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBPostingsEnum.java +++ /dev/null @@ -1,140 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.io.IOException; - -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; - -import com.apple.foundationdb.KeyValue; -import com.apple.foundationdb.Transaction; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.ByteArrayUtil; -import com.apple.foundationdb.tuple.Tuple; - -class FDBPostingsEnum extends PostingsEnum { - - private final Transaction txn; - private final Subspace index; - private final String fieldName; - private final BytesRef term; - - private int freq = 1; - private int pos = -1; - private int startOffset = -1; - private int endOffset = -1; - private BytesRef payload = null; - private int docID = -1; - - public FDBPostingsEnum(final Transaction txn, final Subspace index, final String fieldName, final BytesRef term) { - this.txn = txn; - this.index = index; - this.fieldName = fieldName; - this.term = BytesRef.deepCopyOf(term); - } - - @Override - public int freq() throws IOException { - return freq; - } - - @Override - public int nextPosition() throws IOException { - final Subspace postingsSubspace = FDBAccess.postingsPositionSubspace(index, fieldName, term, this.docID); - final byte[] begin = postingsSubspace.pack(this.pos + 1); - final byte[] end = postingsSubspace.range().end; - - return txn.getRange(begin, end, 1).asList().thenApply(result -> { - if (result.isEmpty()) { - throw new Error("nextPosition called too many times"); - } - final KeyValue kv = result.get(0); - final Tuple kt = postingsSubspace.unpack(kv.getKey()); - final Tuple vt = Tuple.fromBytes(kv.getValue()); - this.pos = (int) kt.getLong(0); - this.startOffset = (int) vt.getLong(0); - this.endOffset = (int) vt.getLong(1); - final byte[] payload = vt.getBytes(2); - if (payload == null) { - this.payload = null; - } else { - this.payload = new BytesRef(payload); - } - return this.pos; - }).join(); - } - - @Override - public int startOffset() throws IOException { - return startOffset; - } - - @Override - public int endOffset() throws IOException { - return endOffset; - } - - @Override - public BytesRef getPayload() throws IOException { - return payload; - } - - @Override - public int docID() { - return docID; - } - - @Override - public int nextDoc() throws IOException { - return advance(docID + 1); - } - - @Override - public int advance(final int target) throws IOException { - final Subspace postingsSubspace = FDBAccess.postingsMetaSubspace(index, fieldName, term); - final byte[] begin = postingsSubspace.pack(target); - final byte[] end = postingsSubspace.range().end; - - return txn.getRange(begin, end, 1).asList().thenApply(result -> { - if (result.isEmpty()) { - this.docID = DocIdSetIterator.NO_MORE_DOCS; - return DocIdSetIterator.NO_MORE_DOCS; - } - - final KeyValue kv = result.get(0); - final Tuple kt = postingsSubspace.unpack(kv.getKey()); - this.docID = (int) kt.getLong(0); - this.freq = (int) ByteArrayUtil.decodeInt(kv.getValue()); - this.pos = -1; - this.startOffset = -1; - this.endOffset = -1; - this.payload = null; - return this.docID; - }).join(); - } - - @Override - public long cost() { - return 1L; - } - - public String toString() { - return term.utf8ToString(); - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBTerms.java b/src/main/java/com/cloudant/fdblucene/FDBTerms.java deleted file mode 100644 index cb3b463..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBTerms.java +++ /dev/null @@ -1,86 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.io.IOException; - -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; - -import com.apple.foundationdb.Transaction; -import com.apple.foundationdb.subspace.Subspace; - -final class FDBTerms extends Terms { - - private final Transaction txn; - private final Subspace index; - private final String fieldName; - - public FDBTerms(final Transaction txn, final Subspace index, final String fieldName) { - this.txn = txn; - this.index = index; - this.fieldName = fieldName; - } - - @Override - public TermsEnum iterator() throws IOException { - return new FDBTermsEnum(txn, index, fieldName); - } - - @Override - public long size() throws IOException { - throw new UnsupportedOperationException("size not supported."); - } - - @Override - public long getSumTotalTermFreq() throws IOException { - final byte[] key = FDBAccess.sumTotalTermFreqKey(index, fieldName); - return Utils.getOrDefault(txn, key, 0); - } - - @Override - public long getSumDocFreq() throws IOException { - final byte[] key = FDBAccess.sumDocFreqKey(index, fieldName); - return Utils.getOrDefault(txn, key, 0); - } - - @Override - public int getDocCount() throws IOException { - final byte[] key = FDBAccess.docCountKey(index, fieldName); - return Utils.getOrDefault(txn, key, 0); - } - - @Override - public boolean hasFreqs() { - throw new UnsupportedOperationException("hasFreqs not supported."); - } - - @Override - public boolean hasOffsets() { - throw new UnsupportedOperationException("hasOffsets not supported."); - } - - @Override - public boolean hasPositions() { - return true; - } - - @Override - public boolean hasPayloads() { - throw new UnsupportedOperationException("hasPayloads not supported."); - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/FDBTermsEnum.java b/src/main/java/com/cloudant/fdblucene/FDBTermsEnum.java deleted file mode 100644 index acba03c..0000000 --- a/src/main/java/com/cloudant/fdblucene/FDBTermsEnum.java +++ /dev/null @@ -1,175 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import java.io.IOException; -import java.util.concurrent.CompletableFuture; - -import org.apache.lucene.index.ImpactsEnum; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.SlowImpactsEnum; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.util.AttributeSource; -import org.apache.lucene.util.BytesRef; - -import com.apple.foundationdb.KeySelector; -import com.apple.foundationdb.KeyValue; -import com.apple.foundationdb.Range; -import com.apple.foundationdb.Transaction; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.ByteArrayUtil; -import com.apple.foundationdb.tuple.Tuple; - -final class FDBTermsEnum extends TermsEnum { - - private static final TermState TERM_STATE = new TermState() { - - @Override - public void copyFrom(TermState other) { - // no-op. - } - - }; - - private static final BytesRef END = new BytesRef(new byte[] { (byte) 0xff }); - - private final Transaction txn; - private final Subspace index; - private final String fieldName; - - private BytesRef term = new BytesRef(); - private int docFreq; - private int totalTermFreq; - - public FDBTermsEnum(final Transaction txn, final Subspace index, final String fieldName) { - this.txn = txn; - this.index = index; - this.fieldName = fieldName; - } - - @Override - public BytesRef next() throws IOException { - final KeySelector begin = KeySelector.firstGreaterThan(FDBAccess.docFreqKey(index, fieldName, term)); - final KeySelector end = KeySelector.lastLessThan(FDBAccess.docFreqKey(index, fieldName, END)); - - return txn.getRange(begin, end, 1).asList().thenApply(result -> { - if (result.isEmpty()) { - return null; - } - updateState(result.get(0)); - return term; - }).join(); - } - - @Override - public AttributeSource attributes() { - throw new UnsupportedOperationException("attributes not supported."); - } - - @Override - public boolean seekExact(final BytesRef text) throws IOException { - final byte[] docFreqKey = FDBAccess.docFreqKey(index, fieldName, text); - final CompletableFuture docFreqFuture = txn.get(docFreqKey); - - final byte[] totalTermFreqKey = FDBAccess.totalTermFreqKey(index, fieldName, text); - final CompletableFuture totalTermFreqFuture = txn.get(totalTermFreqKey); - - final byte[] docFreq = docFreqFuture.join(); - final byte[] totalTermFreq = totalTermFreqFuture.join(); - - if (docFreq != null) { - this.term = text; - this.docFreq = (int) ByteArrayUtil.decodeInt(docFreq); - this.totalTermFreq = (int) ByteArrayUtil.decodeInt(totalTermFreq); - } - - return docFreq != null; - } - - @Override - public SeekStatus seekCeil(BytesRef text) throws IOException { - final byte[] docFreqKey = FDBAccess.docFreqKey(index, fieldName, text); - final Range docFreqRange = FDBAccess.docFreqRange(index, fieldName); - return txn.getRange(docFreqKey, docFreqRange.end, 1).asList().thenApply(result -> { - if (result.isEmpty()) { - return SeekStatus.END; - } - updateState(result.get(0)); - if (term.bytesEquals(text)) { - return SeekStatus.FOUND; - } else { - return SeekStatus.NOT_FOUND; - } - }).join(); - } - - @Override - public void seekExact(long ord) throws IOException { - throw new UnsupportedOperationException("seekExact not supported."); - } - - @Override - public void seekExact(final BytesRef term, final TermState state) throws IOException { - assert state == TERM_STATE; - seekExact(term); - } - - @Override - public BytesRef term() throws IOException { - return term; - } - - @Override - public long ord() throws IOException { - throw new UnsupportedOperationException("ord not supported."); - } - - @Override - public int docFreq() throws IOException { - return docFreq; - } - - @Override - public long totalTermFreq() throws IOException { - return totalTermFreq; - } - - @Override - public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException { - return new FDBPostingsEnum(txn, index, fieldName, term); - } - - @Override - public ImpactsEnum impacts(int flags) throws IOException { - return new SlowImpactsEnum(postings(null, flags)); - } - - @Override - public TermState termState() throws IOException { - return TERM_STATE; - } - - private void updateState(final KeyValue kv) { - final Tuple keyTuple = index.unpack(kv.getKey()); - this.term = new BytesRef(keyTuple.getBytes(2)); - this.docFreq = (int) ByteArrayUtil.decodeInt(kv.getValue()); - final byte[] totalTermFreqKey = FDBAccess.totalTermFreqKey(index, fieldName, term); - final byte[] totalTermFreq = txn.get(totalTermFreqKey).join(); - this.totalTermFreq = (int) ByteArrayUtil.decodeInt(totalTermFreq); - } - -} diff --git a/src/main/java/com/cloudant/fdblucene/Undo.java b/src/main/java/com/cloudant/fdblucene/Undo.java deleted file mode 100644 index f373641..0000000 --- a/src/main/java/com/cloudant/fdblucene/Undo.java +++ /dev/null @@ -1,146 +0,0 @@ -package com.cloudant.fdblucene; - -import java.io.ByteArrayInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.zip.DeflaterOutputStream; -import java.util.zip.InflaterInputStream; - -import com.apple.foundationdb.MutationType; -import com.apple.foundationdb.Range; -import com.apple.foundationdb.Transaction; -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.Tuple; - -/** - * Record transaction steps for later replay. - */ -public final class Undo { - - private static final Map BY_CODE; - - static { - final Map map = new HashMap(); - for (final MutationType type : MutationType.values()) { - map.put(type.code(), type); - } - BY_CODE = Collections.unmodifiableMap(map); - } - - private static final int CLEAR_KEY = 0; - private static final int CLEAR_RANGE = 1; - private static final int MUTATE = 2; - - private static final int MAX_VALUE_LENGTH_BYTES = 4000; - - private final List actions = new ArrayList(); - - public void clear(final byte[] key) { - actions.add(Tuple.from(CLEAR_KEY, key)); - } - - public void clear(final byte[] beginKey, final byte[] endKey) { - actions.add(Tuple.from(CLEAR_RANGE, beginKey, endKey)); - } - - public void clear(final Range range) { - actions.add(Tuple.from(CLEAR_RANGE, range.begin, range.end)); - } - - public void mutate(final MutationType optype, final byte[] key, final byte[] param) { - actions.add(Tuple.from(MUTATE, optype.code(), key, param)); - } - - public void run(final Transaction txn) { - for (Tuple action : actions) { - final int type = (int) action.getLong(0); - switch (type) { - case CLEAR_KEY: - txn.clear(action.getBytes(1)); - break; - case CLEAR_RANGE: - txn.clear(action.getBytes(1), action.getBytes(2)); - break; - case MUTATE: - int code = (int) action.getLong(1); - txn.mutate(BY_CODE.get(code), action.getBytes(2), action.getBytes(3)); - break; - default: - assert false; - break; - } - } - } - - public void save(final Transaction txn, final Subspace subspace) { - // Marshal - final byte[] marshalled; - try (final ByteArrayOutputStream bos = new ByteArrayOutputStream(); - final DeflaterOutputStream fos = new DeflaterOutputStream(bos); - final DataOutputStream dos = new DataOutputStream(fos)) { - dos.writeByte(1); - dos.writeShort(actions.size()); - for (final Tuple action : actions) { - final byte[] packed = action.pack(); - dos.writeShort(packed.length); - dos.write(packed); - } - fos.finish(); - marshalled = bos.toByteArray(); - } catch (final IOException e) { - throw new Error("ByteArrayOutputStream threw IOException", e); - } - - // Persist - txn.clear(subspace.range()); - for (int i = 0; i < marshalled.length; i += MAX_VALUE_LENGTH_BYTES) { - final int len = Math.min(MAX_VALUE_LENGTH_BYTES, marshalled.length - i); - final byte[] chunk = Arrays.copyOfRange(marshalled, i, len); - txn.set(subspace.pack(i), chunk); - } - } - - public void load(final Transaction txn, final Subspace subspace) { - actions.clear(); - try (final ByteArrayOutputStream bos = new ByteArrayOutputStream()) { - // Fetch - txn.getRange(subspace.range()).forEach(kv -> { - try { - bos.write(kv.getValue()); - } catch (final IOException e) { - assert false; - } - }); - - // Unmarshal - final ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray()); - final InflaterInputStream iis = new InflaterInputStream(bis); - final DataInputStream dis = new DataInputStream(iis); - final int version = dis.readByte(); - if (version != 1) { - throw new IllegalArgumentException(); - } - final int count = dis.readShort(); - for (int i = 0; i < count; i++) { - final byte[] bytes = new byte[dis.readShort()]; - dis.readFully(bytes); - final Tuple action = Tuple.fromBytes(bytes); - actions.add(action); - } - } catch (final IOException e) { - throw new Error("ByteArrayOutputStream threw IOException", e); - } - } - - public String toString() { - return actions.toString(); - } -} diff --git a/src/main/java/com/cloudant/fdblucene/Utils.java b/src/main/java/com/cloudant/fdblucene/Utils.java index a0dbba0..17901c0 100644 --- a/src/main/java/com/cloudant/fdblucene/Utils.java +++ b/src/main/java/com/cloudant/fdblucene/Utils.java @@ -26,8 +26,7 @@ class Utils { static void trace(final Transaction txn, final String format, final Object... args) { if (System.getenv("FDB_NETWORK_OPTION_TRACE_ENABLE") != null) { final String str = String.format(format, args); - txn.options().setDebugTransactionIdentifier(str); - txn.options().setLogTransaction(); + txn.options().setTransactionLoggingEnable(str); } } diff --git a/src/test/java/com/cloudant/fdblucene/DocDeleteTest.java b/src/test/java/com/cloudant/fdblucene/DocDeleteTest.java deleted file mode 100644 index 4deef5c..0000000 --- a/src/test/java/com/cloudant/fdblucene/DocDeleteTest.java +++ /dev/null @@ -1,99 +0,0 @@ -package com.cloudant.fdblucene; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; - -import java.util.Arrays; -import java.util.concurrent.atomic.AtomicInteger; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.DoubleDocValuesField; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.Term; -import org.junit.Test; - -import com.apple.foundationdb.subspace.Subspace; -import com.apple.foundationdb.tuple.Tuple; - -public class DocDeleteTest extends BaseFDBTest { - - private static boolean VERBOSE = false; - - @Test - public void delete() throws Exception { - final Analyzer analyzer = new StandardAnalyzer(); - final FDBIndexWriter writer = new FDBIndexWriter(DB, subspace, analyzer); - - writer.addDocument(doc("foo", "bar baz, foobar.")); - assertTrue("nothing got indexed", debrisReport(subspace) > 0); - writer.deleteDocuments(new Term("_id", "foo")); - assertEquals("debris left in index.", 0, debrisReport(subspace)); - } - - @Test - public void update() throws Exception { - final Analyzer analyzer = new StandardAnalyzer(); - final FDBIndexWriter writer = new FDBIndexWriter(DB, subspace, analyzer); - - writer.addDocument(doc("foo", "bar baz, foobar.")); - int debrisCount = debrisReport(subspace); - assertTrue("nothing got indexed", debrisCount > 0); - - writer.updateDocument(new Term("_id", "foo"), doc("foo", "bar baz, foobar.")); - assertEquals("debris left in index.", debrisCount, debrisReport(subspace)); - } - - @Test - public void multipleDocs() throws Exception { - final Analyzer analyzer = new StandardAnalyzer(); - final FDBIndexWriter writer = new FDBIndexWriter(DB, subspace, analyzer); - - for (int i = 0; i < 100; i++) { - writer.addDocument(doc("doc-" + i, "bar baz, foobar.")); - } - assertTrue("nothing got indexed", debrisReport(subspace) > 0); - - for (int i = 0; i < 100; i++) { - writer.deleteDocuments(new Term("_id", "doc-" + i)); - writer.deleteDocuments(new Term("_id", "foo")); - } - - assertEquals("debris left in index.", 0, debrisReport(subspace)); - } - - private Document doc(final String id, final String body) { - final Document result = new Document(); - result.add(new StringField("_id", id, Store.YES)); - result.add(new TextField("stored-body", body, Store.YES)); - result.add(new TextField("unstored-body", body, Store.NO)); - result.add(new DoubleDocValuesField("double-sort", 12.5)); - result.add(new NumericDocValuesField("long-sort", 15)); - result.add(new FDBNumericPoint("double-pt", 5.5)); - result.add(new StoredField("float", 123.456f)); - - return result; - } - - private int debrisReport(final Subspace s) { - final AtomicInteger counter = new AtomicInteger(); - DB.run(txn -> { - txn.getRange(s.range()).forEach(kv -> { - final Tuple key = subspace.unpack(kv.getKey()); - final String value = Arrays.toString(kv.getValue()); - if (VERBOSE) { - System.out.printf("%s %s\n", key, value); - } - counter.getAndIncrement(); - }); - return null; - }); - return counter.get(); - } - -} diff --git a/src/test/java/com/cloudant/fdblucene/FDBIndexReaderWriterTest.java b/src/test/java/com/cloudant/fdblucene/FDBIndexReaderWriterTest.java deleted file mode 100644 index dd6e3d3..0000000 --- a/src/test/java/com/cloudant/fdblucene/FDBIndexReaderWriterTest.java +++ /dev/null @@ -1,249 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; - -import java.io.IOException; -import java.util.Collections; -import java.util.Random; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.DoubleDocValuesField; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.PostingsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanClause.Occur; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.SortField.Type; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.util.BytesRef; -import org.junit.Before; -import org.junit.Test; - -import com.apple.foundationdb.Transaction; - -public class FDBIndexReaderWriterTest extends BaseFDBTest { - - private Random random = new Random(); - - private FDBIndexReader reader; - private IndexSearcher searcher; - - private int docID1; - private int docID2; - private int docID3; - - @Before - public void setupIndex() throws Exception { - final Analyzer analyzer = new StandardAnalyzer(); - - final FDBIndexWriter writer = new FDBIndexWriter(DB, subspace, analyzer); - - docID1 = writer.addDocument(doc("hello", "abc def ghi")); - final int[] ids = writer.addDocuments(doc("bye", "abc ghi def"), doc("hell", "abc def ghi def def")); - docID2 = ids[0]; - docID3 = ids[1]; - - reader = new FDBIndexReader(subspace); - this.searcher = new IndexSearcher(reader); - } - - @Test - public void allStoredFields() throws Exception { - reader.run(DB, () -> { // txn around search and doc fetch - final TopDocs topDocs = searcher.search(new TermQuery(new Term("body", "def")), 5); - assertEquals(3, topDocs.totalHits.value); - - final Document hit = reader.document(topDocs.scoreDocs[0].doc); - assertEquals(123.456f, hit.getField("float").numericValue()); - assertEquals(123.456, hit.getField("double").numericValue()); - return null; - }); - } - - @Test - public void someStoredFields() throws Exception { - reader.run(DB, () -> { // txn around search and doc fetch - final TopDocs topDocs = searcher.search(new TermQuery(new Term("body", "def")), 5); - assertEquals(3, topDocs.totalHits.value); - - final Document hit = reader.document(topDocs.scoreDocs[0].doc, Collections.singleton("float")); - assertEquals(123.456f, hit.getField("float").numericValue()); - assertNull(hit.getField("double")); - return null; - }); - } - - @Test - public void termQuery() throws Exception { - final TopDocs topDocs = search(new TermQuery(new Term("body", "def")), 5); - assertEquals(3, topDocs.totalHits.value); - } - - @Test - public void prefixQuery() throws Exception { - final TopDocs topDocs = search(new PrefixQuery(new Term("_id", "hel")), 2); - assertEquals(2, topDocs.totalHits.value); - } - - @Test - public void prefixQuery2() throws Exception { - final TopDocs topDocs = search(new PrefixQuery(new Term("_id", "hell")), 2); - assertEquals(2, topDocs.totalHits.value); - } - - @Test - public void phraseQuery() throws Exception { - final TopDocs topDocs = search(new PhraseQuery("body", "abc", "def"), 1); - assertEquals(2, topDocs.totalHits.value); - } - - @Test - public void phraseQuery2() throws Exception { - final TopDocs topDocs = search(new PhraseQuery("body", "def", "def"), 1); - assertEquals(1, topDocs.totalHits.value); - } - - @Test - public void sorting() throws Exception { - final Query query = new TermQuery(new Term("body", "def")); - final Sort sort = new Sort(new SortField("double-sort", Type.DOUBLE)); - final TopFieldDocs topDocs = search(query, 10, sort); - assertEquals(3, topDocs.totalHits.value); - double low = 0.0; - for (int i = 0; i < topDocs.totalHits.value; i++) { - final Double order = (Double) ((FieldDoc) topDocs.scoreDocs[i]).fields[0]; - assertTrue(order >= low); - low = order; - } - } - - @Test - public void testFDBPostingsEnum() throws Exception { - final Transaction txn = DB.createTransaction(); - try { - final PostingsEnum p = new FDBPostingsEnum(txn, subspace, "body", new BytesRef("def")); - - for (int i = 0; i < 3; i++) { - int nextID = p.nextDoc(); - if (nextID == docID1) { - assertEquals(1, p.nextPosition()); - } - if (nextID == docID2) { - assertEquals(2, p.nextPosition()); - } - if (nextID == docID3) { - assertEquals(1, p.nextPosition()); - assertEquals(3, p.nextPosition()); - assertEquals(4, p.nextPosition()); - } - } - assertEquals(DocIdSetIterator.NO_MORE_DOCS, p.nextDoc()); - } finally { - txn.commit().join(); - txn.close(); - } - } - - @Test - public void booleanQuery() throws Exception { - final BooleanQuery.Builder builder = new BooleanQuery.Builder(); - builder.add(new TermQuery(new Term("body", "def")), Occur.MUST); - builder.add(new PrefixQuery(new Term("_id", "hell")), Occur.MUST); - - final TopDocs topDocs = search(builder.build(), 1); - assertEquals(2, topDocs.totalHits.value); - } - - @Test - public void doubleExactQuery() throws Exception { - Query query = FDBNumericPoint.newExactQuery("double-pt", 5.5); - final TopDocs topDocs = search(query, 5); - assertEquals(3, topDocs.totalHits.value); - } - - @Test - public void doubleRangeQuery() throws Exception { - Query query = FDBNumericPoint.newRangeQuery("double-pt", 4.0, 6.0); - final TopDocs topDocs = search(query, 5); - assertEquals(3, topDocs.totalHits.value); - } - - @Test - public void longExactQuery() throws Exception { - Query query = FDBNumericPoint.newExactQuery("long-pt", 17L); - final TopDocs topDocs = search(query, 5); - assertEquals(3, topDocs.totalHits.value); - } - - @Test - public void longRangeQuery() throws Exception { - Query query = FDBNumericPoint.newRangeQuery("long-pt", 16L, 18L); - final TopDocs topDocs = search(query, 5); - assertEquals(3, topDocs.totalHits.value); - } - - private Document doc(final String id, final String body) { - final Document result = new Document(); - result.add(new StringField("_id", id, Store.YES)); - result.add(new TextField("body", body, Store.NO)); - - // For sorting - result.add(new DoubleDocValuesField("double-sort", random.nextDouble())); - result.add(new NumericDocValuesField("long-sort", random.nextLong())); - - // For querying - result.add(new FDBNumericPoint("double-pt", 5.5)); - result.add(new FDBNumericPoint("long-pt", 17L)); - - // For retrieval - result.add(new StoredField("float", 123.456f)); - result.add(new StoredField("double", 123.456)); - return result; - } - - private TopDocs search(final Query query, final int count) throws IOException { - return reader.run(DB, () -> { - return searcher.search(query, count); - }); - } - - private TopFieldDocs search(final Query query, final int count, final Sort sort) throws IOException { - return reader.run(DB, () -> { - return searcher.search(query, count, sort); - }); - } - -} diff --git a/src/test/java/com/cloudant/fdblucene/ScoreTest.java b/src/test/java/com/cloudant/fdblucene/ScoreTest.java deleted file mode 100644 index 093894d..0000000 --- a/src/test/java/com/cloudant/fdblucene/ScoreTest.java +++ /dev/null @@ -1,168 +0,0 @@ -/******************************************************************************* - * Copyright 2019 IBM Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *******************************************************************************/ -package com.cloudant.fdblucene; - -import static org.junit.Assert.assertEquals; - -import java.io.IOException; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.TextField; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; -import org.junit.After; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -import com.apple.foundationdb.Database; -import com.apple.foundationdb.FDB; -import com.apple.foundationdb.subspace.Subspace; - -/** - * Tests that demonstrate equivalence with normal Lucene code. - */ -public class ScoreTest { - - private static final Analyzer ANALYZER = new StandardAnalyzer(); - - private static final boolean VERBOSE = false; - - private static Database DB; - private Subspace subspace1; - private Subspace subspace2; - - @BeforeClass - public static void setupFDB() { - FDB.selectAPIVersion(600); - DB = FDB.instance().open(); - } - - @Before - public void setup() throws Exception { - subspace1 = new Subspace(new byte[] { 1 }); - subspace2 = new Subspace(new byte[] { 2 }); - cleanup(); - } - - @After - public void cleanup() throws Exception { - DB.run(txn -> { - txn.clear(subspace1.range()); - txn.clear(subspace2.range()); - return null; - }); - } - - @Test - public void noRepeatedTerm() throws IOException { - final Document doc1 = doc("text", "hello there you"); - compareScores(new TermQuery(new Term("text", "hello")), doc1); - } - - @Test - public void repeatedTerm() throws IOException { - final Document doc1 = doc("text", "hello there you hello"); - compareScores(new TermQuery(new Term("text", "hello")), doc1); - } - - @Test - public void prefix() throws IOException { - final Document doc1 = doc("text", "hello there you hello"); - final Document doc2 = doc("text", "hello there you hello"); - final Document doc3 = doc("text", " there you "); - compareScores(new PrefixQuery(new Term("text", "hell")), doc1, doc2, doc3); - } - - @Test - public void phrase() throws IOException { - final Document doc1 = doc("text", "hello there you hello"); - final Document doc2 = doc("text", "hello there you hello"); - final Document doc3 = doc("text", " there you "); - compareScores(new PhraseQuery("text", "you", "hello"), doc1, doc2, doc3); - } - - private void compareScores(final Query query, final Document... docs) throws IOException { - final TopDocs td1 = search("FDBDirectory", query, indexWithFDBDirectory(docs)); - final TopDocs td2 = search("FDBIndexWriter", query, indexWithFDBIndexWriter(docs)); - assertEquals(td1.totalHits, td2.totalHits); - assertEquals(td1.scoreDocs.length, td2.scoreDocs.length); - for (int i = 0; i < td1.scoreDocs.length; i++) { - assertEquals(td1.scoreDocs[i].score, td2.scoreDocs[i].score, 0.0); - } - } - - private IndexReader indexWithFDBDirectory(final Document... docs) throws IOException { - final FDBDirectory dir = FDBDirectory.open(DB, subspace1, FDBUtil.DEFAULT_PAGE_SIZE, FDBUtil.DEFAULT_TXN_SIZE); - final IndexWriterConfig config = new IndexWriterConfig(ANALYZER); - try (final IndexWriter writer = new IndexWriter(dir, config)) { - for (final Document doc : docs) { - writer.addDocument(doc); - } - writer.commit(); - } - return DirectoryReader.open(dir); - } - - private IndexReader indexWithFDBIndexWriter(final Document... docs) throws IOException { - final FDBIndexWriter writer = new FDBIndexWriter(DB, subspace2, ANALYZER); - for (final Document doc : docs) { - writer.addDocument(doc); - } - return new FDBIndexReader(subspace2); - } - - private TopDocs search(final String prefix, final Query query, final IndexReader reader) throws IOException { - final IndexSearcher searcher = new IndexSearcher(reader); - final TopDocs result; - - if (reader instanceof FDBIndexReader) { - result = ((FDBIndexReader) reader).run(DB, () -> { - return searcher.search(query, 1); - }); - } else { - result = searcher.search(query, 1); - } - - if (VERBOSE) { - for (final ScoreDoc doc : result.scoreDocs) { - System.out.printf("%s\n%s\n", prefix, searcher.explain(query, doc.doc)); - } - } - - return result; - } - - private Document doc(final String field, String text) { - final Document result = new Document(); - result.add(new TextField(field, text, Store.NO)); - return result; - } - -}