diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..e7aa96ea --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +* text=auto +*.java text eol=lf diff --git a/README.md b/README.md index 66f02051..2ae2cd3f 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,6 @@ Recommended Package imports: ```java import org.iq80.leveldb.*; -import static org.iq80.leveldb.impl.Iq80DBFactory.*; import java.io.*; ``` diff --git a/leveldb-api/src/main/java/org/iq80/leveldb/DB.java b/leveldb-api/src/main/java/org/iq80/leveldb/DB.java index cd2e5bd5..f77e07a4 100644 --- a/leveldb-api/src/main/java/org/iq80/leveldb/DB.java +++ b/leveldb-api/src/main/java/org/iq80/leveldb/DB.java @@ -88,10 +88,17 @@ void suspendCompactions() void resumeCompactions(); /** - * Force a compaction of the specified key range. + * Compact the underlying storage for the key range [begin, end]. + * In particular, deleted and overwritten versions are discarded, + * and the data is rearranged to reduce the cost of operations + * needed to access the data. This operation should typically only + * be invoked by users who understand the underlying implementation. + *

+ * Call to {@code db.compactRange(null, null);} will compact the + * entire database. * * @param begin if null then compaction start from the first key - * @param end if null then compaction ends at the last key + * @param end if null then compaction ends at the last key */ void compactRange(byte[] begin, byte[] end) throws DBException; diff --git a/leveldb-api/src/main/java/org/iq80/leveldb/DBIterator.java b/leveldb-api/src/main/java/org/iq80/leveldb/DBIterator.java index 6a1ab6fa..4a2d0371 100644 --- a/leveldb-api/src/main/java/org/iq80/leveldb/DBIterator.java +++ b/leveldb-api/src/main/java/org/iq80/leveldb/DBIterator.java @@ -62,4 +62,7 @@ public interface DBIterator * Repositions the iterator so it is at the end of of the Database. */ void seekToLast(); + + @Override + void close(); } diff --git a/leveldb-api/src/main/java/org/iq80/leveldb/Options.java b/leveldb-api/src/main/java/org/iq80/leveldb/Options.java index 630753cc..90be4412 100644 --- a/leveldb-api/src/main/java/org/iq80/leveldb/Options.java +++ b/leveldb-api/src/main/java/org/iq80/leveldb/Options.java @@ -33,6 +33,30 @@ public class Options private DBComparator comparator; private Logger logger; private long cacheSize; + private boolean allowMmapReads = true; + private boolean allowMmapWrites = true; + private XFilterPolicy filterPolicy; + + public static Options fromOptions(Options options) + { + final Options options1 = new Options(); + options1.createIfMissing = options.createIfMissing; + options1.errorIfExists = options.errorIfExists; + options1.writeBufferSize = options.writeBufferSize; + options1.maxOpenFiles = options.maxOpenFiles; + options1.blockRestartInterval = options.blockRestartInterval; + options1.blockSize = options.blockSize; + options1.compressionType = options.compressionType; + options1.verifyChecksums = options.verifyChecksums; + options1.paranoidChecks = options.paranoidChecks; + options1.comparator = options.comparator; + options1.logger = options.logger; + options1.cacheSize = options.cacheSize; + options1.allowMmapReads = options.allowMmapReads; + options1.allowMmapWrites = options.allowMmapWrites; + options1.filterPolicy = options.filterPolicy; + return options1; + } static void checkArgNotNull(Object value, String name) { @@ -173,4 +197,43 @@ public Options paranoidChecks(boolean paranoidChecks) this.paranoidChecks = paranoidChecks; return this; } + + public Options allowMmapReads(boolean allowMmapReads) + { + this.allowMmapReads = allowMmapReads; + return this; + } + + public boolean allowMmapReads() + { + return allowMmapReads; + } + + public Options allowMmapWrites(boolean allowMmapWrites) + { + this.allowMmapWrites = allowMmapWrites; + return this; + } + + public boolean allowMmapWrites() + { + return allowMmapWrites; + } + + /** + * Set table filter policy + * + * @param filterPolicy new filter policy + * @return self + */ + public Options filterPolicy(XFilterPolicy filterPolicy) + { + this.filterPolicy = filterPolicy; + return this; + } + + public XFilterPolicy filterPolicy() + { + return filterPolicy; + } } diff --git a/leveldb-api/src/main/java/org/iq80/leveldb/WriteOptions.java b/leveldb-api/src/main/java/org/iq80/leveldb/WriteOptions.java index 819c334e..d3eeddd8 100644 --- a/leveldb-api/src/main/java/org/iq80/leveldb/WriteOptions.java +++ b/leveldb-api/src/main/java/org/iq80/leveldb/WriteOptions.java @@ -22,6 +22,25 @@ public class WriteOptions private boolean sync; private boolean snapshot; + /** + * If true, the write will be flushed from the operating system + * buffer cache (by calling WritableFile::Sync()) before the write + * is considered complete. If this flag is true, writes will be + * slower. + *

+ * If this flag is false, and the machine crashes, some recent + * writes may be lost. Note that if it is just the process that + * crashes (i.e., the machine does not reboot), no writes will be + * lost even if sync==false. + *

+ * In other words, a DB write with sync==false has similar + * crash semantics as the "write()" system call. A DB write + * with sync==true has similar crash semantics to a "write()" + * system call followed by "fsync()". + *

+ * In java Implementation if process crash + * Default: false + **/ public boolean sync() { return sync; diff --git a/leveldb-api/src/main/java/org/iq80/leveldb/XFilterPolicy.java b/leveldb-api/src/main/java/org/iq80/leveldb/XFilterPolicy.java new file mode 100644 index 00000000..67de8ecb --- /dev/null +++ b/leveldb-api/src/main/java/org/iq80/leveldb/XFilterPolicy.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb; + +/** + * A database can be configured with a custom FilterPolicy object. + * This object is responsible for creating a small filter from a set + * of keys. These filters are stored in leveldb and are consulted + * automatically by leveldb to decide whether or not to read some + * information from disk. In many cases, a filter can cut down the + * number of disk seeks form a handful to a single disk seek per + * DB::Get() call. + *

+ * Most people will want to use the builtin bloom filter support (see + * NewBloomFilterPolicy() below). + * + * @author Honore Vasconcelos + */ +public interface XFilterPolicy +{ +} diff --git a/leveldb-benchmark/pom.xml b/leveldb-benchmark/pom.xml index cb05f04a..a2ffbe3a 100644 --- a/leveldb-benchmark/pom.xml +++ b/leveldb-benchmark/pom.xml @@ -30,6 +30,7 @@ ${project.parent.basedir} + false diff --git a/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/DbBenchmark.java b/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/DbBenchmark.java index 7959c9c1..d415fad9 100644 --- a/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/DbBenchmark.java +++ b/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/DbBenchmark.java @@ -26,9 +26,10 @@ import org.iq80.leveldb.DBFactory; import org.iq80.leveldb.DBIterator; import org.iq80.leveldb.Options; +import org.iq80.leveldb.ReadOptions; import org.iq80.leveldb.WriteBatch; import org.iq80.leveldb.WriteOptions; -import org.iq80.leveldb.impl.DbImpl; +import org.iq80.leveldb.table.BloomFilterPolicy; import org.iq80.leveldb.util.Closeables; import org.iq80.leveldb.util.FileUtils; import org.iq80.leveldb.util.PureJavaCrc32C; @@ -39,21 +40,20 @@ import java.io.File; import java.io.IOException; +import java.lang.reflect.Method; import java.nio.ByteBuffer; +import java.util.Arrays; import java.util.Date; import java.util.EnumMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; import static com.google.common.base.Preconditions.checkArgument; import static java.nio.charset.StandardCharsets.UTF_8; -import static org.iq80.leveldb.benchmark.DbBenchmark.DBState.EXISTING; -import static org.iq80.leveldb.benchmark.DbBenchmark.DBState.FRESH; -import static org.iq80.leveldb.benchmark.DbBenchmark.Order.RANDOM; -import static org.iq80.leveldb.benchmark.DbBenchmark.Order.SEQUENTIAL; -import static org.iq80.leveldb.impl.DbConstants.NUM_LEVELS; public class DbBenchmark { @@ -61,38 +61,18 @@ public class DbBenchmark private final Integer writeBufferSize; private final File databaseDir; private final double compressionRatio; - private long startTime; - - enum Order - { - SEQUENTIAL, - RANDOM - } - - enum DBState - { - FRESH, - EXISTING - } + private final Map flags; // Cache cache_; private final List benchmarks; + private final int blockCacheSize; + private final int bloomFilterBits; private DB db; - private final int num; + private int num; private int reads; - private final int valueSize; - private int heapCounter; - private double lastOpFinish; - private long bytes; - private String message; - private String postMessage; - // private Histogram hist_; - private final RandomGenerator generator; - private final Random random; - - // State kept for progress messages - private int done; - private int nextReport; // When to report next + private int valueSize; + private WriteOptions writeOptions; + private int entriesPerBatch; private final DBFactory factory; @@ -101,16 +81,18 @@ public DbBenchmark(Map flags) { ClassLoader cl = DbBenchmark.class.getClassLoader(); factory = (DBFactory) cl.loadClass(System.getProperty("leveldb.factory", "org.iq80.leveldb.impl.Iq80DBFactory")).newInstance(); + this.flags = flags; benchmarks = (List) flags.get(Flag.benchmarks); - num = (Integer) flags.get(Flag.num); - reads = (Integer) (flags.get(Flag.reads) == null ? flags.get(Flag.num) : flags.get(Flag.reads)); - valueSize = (Integer) flags.get(Flag.value_size); + writeBufferSize = (Integer) flags.get(Flag.write_buffer_size); compressionRatio = (Double) flags.get(Flag.compression_ratio); useExisting = (Boolean) flags.get(Flag.use_existing_db); - heapCounter = 0; - bytes = 0; - random = new Random(301); + blockCacheSize = (Integer) flags.get(Flag.cache_size); + bloomFilterBits = (Integer) flags.get(Flag.bloom_bits); + num = (Integer) flags.get(Flag.num); + reads = (Integer) (flags.get(Flag.reads) == null ? flags.get(Flag.num) : flags.get(Flag.reads)); + valueSize = (Integer) flags.get(Flag.value_size); + entriesPerBatch = 1; databaseDir = new File((String) flags.get(Flag.db)); @@ -124,8 +106,6 @@ public DbBenchmark(Map flags) if (!useExisting) { destroyDb(); } - - generator = new RandomGenerator(compressionRatio); } private void run() @@ -135,92 +115,216 @@ private void run() open(); for (String benchmark : benchmarks) { - start(); + // Reset parameters that may be overridden below + num = (Integer) flags.get(Flag.num); + reads = (Integer) (flags.get(Flag.reads) == null ? flags.get(Flag.num) : flags.get(Flag.reads)); + valueSize = (Integer) flags.get(Flag.value_size); + entriesPerBatch = 1; + writeOptions = new WriteOptions(); - boolean known = true; + boolean freshBb = false; + int numThreads = (Integer) flags.get(Flag.threads); + + String method = null; if (benchmark.equals("fillseq")) { - write(new WriteOptions(), SEQUENTIAL, FRESH, num, valueSize, 1); + freshBb = true; + method = "writeSeq"; } else if (benchmark.equals("fillbatch")) { - write(new WriteOptions(), SEQUENTIAL, FRESH, num, valueSize, 1000); + freshBb = true; + entriesPerBatch = 1000; + method = "writeSeq"; } else if (benchmark.equals("fillrandom")) { - write(new WriteOptions(), RANDOM, FRESH, num, valueSize, 1); + freshBb = true; + method = "writeRandom"; } else if (benchmark.equals("overwrite")) { - write(new WriteOptions(), RANDOM, EXISTING, num, valueSize, 1); + freshBb = false; + method = "writeRandom"; } else if (benchmark.equals("fillsync")) { - write(new WriteOptions().sync(true), RANDOM, FRESH, num / 1000, valueSize, 1); + freshBb = true; + num /= 1000; + writeOptions.sync(true); + method = "writeRandom"; } else if (benchmark.equals("fill100K")) { - write(new WriteOptions(), RANDOM, FRESH, num / 1000, 100 * 1000, 1); + freshBb = true; + num /= 1000; + valueSize = 100 * 1000; + method = "writeRandom"; } else if (benchmark.equals("readseq")) { - readSequential(); + method = "readSequential"; } else if (benchmark.equals("readreverse")) { - readReverse(); + method = "readReverse"; } else if (benchmark.equals("readrandom")) { - readRandom(); + method = "readRandom"; + } + else if (benchmark.equals("seekrandom")) { + method = "seekRandom"; } else if (benchmark.equals("readhot")) { - readHot(); + method = "readHot"; } else if (benchmark.equals("readrandomsmall")) { - int n = reads; reads /= 1000; - readRandom(); - reads = n; + method = "readRandom"; + } + else if (benchmark.equals("readwhilewriting")) { + numThreads++; // Add extra thread for writing + method = "readWhileWriting"; } else if (benchmark.equals("compact")) { - compact(); + method = "compact"; } else if (benchmark.equals("crc32c")) { - crc32c(4096, "(4k per op)"); + method = "crc32c"; } else if (benchmark.equals("acquireload")) { - acquireLoad(); + method = "acquireLoad"; } else if (benchmark.equals("snappycomp")) { if (Snappy.available()) { - snappyCompress(); + method = "snappyCompress"; } } else if (benchmark.equals("snappyuncomp")) { if (Snappy.available()) { - snappyUncompressDirectBuffer(); + method = "snappyUncompressDirectBuffer"; } } else if (benchmark.equals("unsnap-array")) { if (Snappy.available()) { - snappyUncompressArray(); + method = "snappyUncompressArray"; } } else if (benchmark.equals("unsnap-direct")) { if (Snappy.available()) { - snappyUncompressDirectBuffer(); + method = "snappyUncompressDirectBuffer"; } } else if (benchmark.equals("heapprofile")) { heapProfile(); } else if (benchmark.equals("stats")) { - printStats(); + printStats("leveldb.stats"); } else { - known = false; System.err.println("Unknown benchmark: " + benchmark); } - if (known) { - stop(benchmark); + if (freshBb) { + if (useExisting) { + System.out.println("skipping (--use_existing_db is true)"); + return; + } + db.close(); + db = null; + destroyDb(); + open(); + } + if (method != null) { + try { + runBenchmark(numThreads, benchmark, method); + } + catch (Exception e) { + System.out.println("Failed to rung " + method); + e.printStackTrace(); + return; + } } + } db.close(); } + private void runBenchmark(int n, String name, String method) throws Exception + { + SharedState shared = new SharedState(); + + ThreadArg[] arg = new ThreadArg[n]; + for (int i = 0; i < arg.length; ++i) { + arg[i] = new ThreadArg(); + arg[i].bm = this; + arg[i].method = method; + arg[i].shared = shared; + arg[i].thread = new ThreadState(i); + arg[i].thread.shared = shared; + startThread(arg[i]); + } + + shared.mu.lock(); + while (shared.numInitialized < n) { + shared.cv.await(); + } + + shared.start = true; + shared.cv.signalAll(); + while (shared.numDone < n) { + shared.cv.await(); + } + shared.mu.unlock(); + + for (int i = 1; i < n; i++) { + arg[0].thread.stats.merge(arg[i].thread.stats); + } + arg[0].thread.stats.report(name); + } + + public void startThread(final ThreadArg arg) + { + new Thread(new Runnable() + { + @Override + public void run() + { + SharedState shared = arg.shared; + ThreadState thread = arg.thread; + shared.mu.lock(); + try { + shared.numInitialized++; + if (shared.numInitialized >= shared.total) { + shared.cv.signalAll(); + } + while (!shared.start) { + shared.cv.awaitUninterruptibly(); + } + } + finally { + shared.mu.unlock(); + } + try { + Method method = arg.bm.getClass().getDeclaredMethod(arg.method, ThreadState.class); + method.setAccessible(true); + thread.stats.start(); + method.invoke(arg.bm, thread); + } + catch (Exception e) { + thread.stats.addMessage("ERROR " + e); + e.printStackTrace(); + } + finally { + thread.stats.stop(); + } + + shared.mu.lock(); + try { + shared.numDone++; + if (shared.numDone >= shared.total) { + shared.cv.signalAll(); + } + } + finally { + shared.mu.unlock(); + } + } + }).start(); + } + private void printHeader() throws IOException { @@ -240,10 +344,12 @@ private void printHeader() System.out.printf("------------------------------------------------\n"); } + @SuppressWarnings({"InnerAssignment"}) static void printWarnings() { - boolean assertsEnabled = true; - assert assertsEnabled; // Intentional side effect!!! + boolean assertsEnabled = false; + // CHECKSTYLE IGNORE check FOR NEXT 1 LINES + assert assertsEnabled = true; // Intentional side effect!!! if (assertsEnabled) { System.out.printf("WARNING: Assertions are enabled; benchmarks unnecessarily slow\n"); } @@ -302,95 +408,41 @@ private void open() { Options options = new Options(); options.createIfMissing(!useExisting); - // todo block cache + if (blockCacheSize >= 0) { + options.cacheSize(blockCacheSize); + } + if (bloomFilterBits >= 0) { + options.filterPolicy(new BloomFilterPolicy(bloomFilterBits)); + } + options.cacheSize(blockCacheSize); if (writeBufferSize != null) { options.writeBufferSize(writeBufferSize); } db = factory.open(databaseDir, options); } - private void start() - { - startTime = System.nanoTime(); - bytes = 0; - message = null; - lastOpFinish = startTime; - // hist.clear(); - done = 0; - nextReport = 100; - } - - private void stop(String benchmark) - { - long endTime = System.nanoTime(); - double elapsedSeconds = 1.0d * (endTime - startTime) / TimeUnit.SECONDS.toNanos(1); - - // Pretend at least one op was done in case we are running a benchmark - // that does nto call FinishedSingleOp(). - if (done < 1) { - done = 1; - } - - if (bytes > 0) { - String rate = String.format("%6.1f MB/s", (bytes / 1048576.0) / elapsedSeconds); - if (message != null) { - message = rate + " " + message; - } - else { - message = rate; - } - } - else if (message == null) { - message = ""; - } - - System.out.printf("%-12s : %11.5f micros/op;%s%s\n", - benchmark, - elapsedSeconds * 1.0e6 / done, - (message == null ? "" : " "), - message); -// if (FLAGS_histogram) { -// System.out.printf("Microseconds per op:\n%s\n", hist_.ToString().c_str()); -// } - - if (postMessage != null) { - System.out.printf("\n%s\n", postMessage); - postMessage = null; - } - - } - - private void write(WriteOptions writeOptions, Order order, DBState state, int numEntries, int valueSize, int entriesPerBatch) + private void write(ThreadState thread, boolean seq) throws IOException { - if (state == FRESH) { - if (useExisting) { - message = "skipping (--use_existing_db is true)"; - return; - } - db.close(); - db = null; - destroyDb(); - open(); - start(); // Do not count time taken to destroy/open + if (!flags.get(Flag.num).equals(num)) { + thread.stats.addMessage(String.format("(%d ops)", num)); } - if (numEntries != num) { - message = String.format("(%d ops)", numEntries); - } - - for (int i = 0; i < numEntries; i += entriesPerBatch) { + RandomGenerator gen = newGenerator(); + long bytes = 0; + for (int i = 0; i < num; i += entriesPerBatch) { WriteBatch batch = db.createWriteBatch(); for (int j = 0; j < entriesPerBatch; j++) { - int k = (order == SEQUENTIAL) ? i + j : random.nextInt(num); + int k = seq ? i + j : thread.rand.nextInt(num); byte[] key = formatNumber(k); - batch.put(key, generator.generate(valueSize)); + batch.put(key, gen.generate(valueSize)); bytes += valueSize + key.length; - finishedSingleOp(); + thread.stats.finishedSingleOp(); } db.write(batch, writeOptions); batch.close(); } + thread.stats.addBytes(bytes); } public static byte[] formatNumber(long n) @@ -410,100 +462,123 @@ public static byte[] formatNumber(long n) return slice; } - private void finishedSingleOp() - { -// if (histogram) { -// todo -// } - done++; - if (done >= nextReport) { - if (nextReport < 1000) { - nextReport += 100; - } - else if (nextReport < 5000) { - nextReport += 500; - } - else if (nextReport < 10000) { - nextReport += 1000; - } - else if (nextReport < 50000) { - nextReport += 5000; - } - else if (nextReport < 100000) { - nextReport += 10000; - } - else if (nextReport < 500000) { - nextReport += 50000; - } - else { - nextReport += 100000; - } - System.out.printf("... finished %d ops%30s\r", done, ""); - - } - } - - private void readSequential() + private void readSequential(ThreadState thread) { + long bytes = 0; for (int loops = 0; loops < 5; loops++) { DBIterator iterator = db.iterator(); for (int i = 0; i < reads && iterator.hasNext(); i++) { Map.Entry entry = iterator.next(); bytes += entry.getKey().length + entry.getValue().length; - finishedSingleOp(); + thread.stats.finishedSingleOp(); } Closeables.closeQuietly(iterator); } + thread.stats.addBytes(bytes); } - private void readReverse() + private void readReverse(ThreadState thread) { //To change body of created methods use File | Settings | File Templates. } - private void readRandom() + private void readRandom(ThreadState thread) { + int found = 0; for (int i = 0; i < reads; i++) { - byte[] key = formatNumber(random.nextInt(num)); + byte[] key = formatNumber(thread.rand.nextInt(num)); byte[] value = db.get(key); - if (value == null) { - throw new NullPointerException(String.format("db.get(%s) is null", new String(key, UTF_8))); + if (value != null) { + found++; } - bytes += key.length + value.length; - finishedSingleOp(); + thread.stats.finishedSingleOp(); + } + thread.stats.addMessage(String.format("(%d of %d found)", found, num)); + } + + private void readMissing(ThreadState thread) + { + + for (int i = 0; i < reads; i++) { + byte[] key = formatNumber(thread.rand.nextInt(num)); + db.get(key); + thread.stats.finishedSingleOp(); } } - private void readHot() + private void readHot(ThreadState thread) { + long bytes = 0; int range = (num + 99) / 100; for (int i = 0; i < reads; i++) { - byte[] key = formatNumber(random.nextInt(range)); + byte[] key = formatNumber(thread.rand.nextInt(range)); byte[] value = db.get(key); bytes += key.length + value.length; - finishedSingleOp(); + thread.stats.finishedSingleOp(); } + thread.stats.addBytes(bytes); } - private void compact() - throws IOException + private void seekRandom(ThreadState thread) throws IOException { - if (db instanceof DbImpl) { - ((DbImpl) db).compactMemTable(); - for (int level = 0; level < NUM_LEVELS - 1; level++) { - ((DbImpl) db).compactRange(level, Slices.copiedBuffer("", UTF_8), Slices.copiedBuffer("~", UTF_8)); + ReadOptions options = new ReadOptions(); + int found = 0; + for (int i = 0; i < reads; i++) { + DBIterator iter = db.iterator(options); + byte[] key = formatNumber(thread.rand.nextInt(num)); + iter.seek(key); + if (iter.hasNext() == Arrays.equals(iter.next().getKey(), key)) { + found++; } + iter.close(); + thread.stats.finishedSingleOp(); } + thread.stats.addMessage(String.format("(%d of %d found)", found, num)); } - private void crc32c(int blockSize, String message) + private void readWhileWriting(ThreadState thread) { - // Checksum about 500MB of data total - byte[] data = new byte[blockSize]; - for (int i = 0; i < data.length; i++) { - data[i] = 'x'; + if (thread.tid > 0) { + readRandom(thread); + } + else { + // Special thread that keeps writing until other threads are done. + RandomGenerator gen = newGenerator(); + while (true) { + thread.shared.mu.lock(); + try { + if (thread.shared.numDone + 1 >= thread.shared.numInitialized) { + // Other threads have finished + break; + } + } + finally { + thread.shared.mu.unlock(); + } + byte[] key = formatNumber(thread.rand.nextInt((Integer) flags.get(Flag.num))); + db.put(key, gen.generate(valueSize), writeOptions); + } + + // Do not count any of the preceding work/delay in stats. + thread.stats.start(); } + } + + private void compact(ThreadState thread) + throws IOException + { + db.compactRange(null, null); + } + + private void crc32c(final ThreadState thread) + { + // Checksum about 500MB of data total + int blockSize = 4096; + String label = "(4K per op)"; + // Checksum about 500MB of data total + byte[] data = new byte[blockSize]; + Arrays.fill(data, (byte) 'x'); long bytes = 0; int crc = 0; @@ -511,26 +586,27 @@ private void crc32c(int blockSize, String message) PureJavaCrc32C checksum = new PureJavaCrc32C(); checksum.update(data, 0, blockSize); crc = checksum.getMaskedValue(); - finishedSingleOp(); + thread.stats.finishedSingleOp(); bytes += blockSize; } + // Print so result is not dead System.out.printf("... crc=0x%x\r", crc); - this.bytes = bytes; - // Print so result is not dead - this.message = message; + thread.stats.addBytes(bytes); + thread.stats.addMessage(label); } - private void acquireLoad() + private void acquireLoad(ThreadState thread) { //To change body of created methods use File | Settings | File Templates. } - private void snappyCompress() + private void snappyCompress(ThreadState thread) { - byte[] raw = generator.generate(new Options().blockSize()); + byte[] raw = newGenerator().generate(new Options().blockSize()); byte[] compressedOutput = new byte[Snappy.maxCompressedLength(raw.length)]; + long bytes = 0; long produced = 0; // attempt to compress the block @@ -541,20 +617,27 @@ private void snappyCompress() produced += compressedSize; } catch (IOException ignored) { + thread.stats.addMessage("(snappy failure)"); throw Throwables.propagate(ignored); } - finishedSingleOp(); + thread.stats.finishedSingleOp(); } + thread.stats.addMessage(String.format("(output: %.1f%%)", (produced * 100.0) / bytes)); + thread.stats.addBytes(bytes); + } - message = String.format("(output: %.1f%%)", (produced * 100.0) / bytes); + private RandomGenerator newGenerator() + { + return new RandomGenerator(compressionRatio); } - private void snappyUncompressArray() + private void snappyUncompressArray(ThreadState thread) { int inputSize = new Options().blockSize(); byte[] compressedOutput = new byte[Snappy.maxCompressedLength(inputSize)]; - byte[] raw = generator.generate(inputSize); + byte[] raw = newGenerator().generate(inputSize); + long bytes = 0; int compressedLength; try { compressedLength = Snappy.compress(raw, 0, raw.length, compressedOutput, 0); @@ -569,18 +652,20 @@ private void snappyUncompressArray() bytes += inputSize; } catch (IOException ignored) { + thread.stats.addMessage("(snappy failure)"); throw Throwables.propagate(ignored); } - finishedSingleOp(); + thread.stats.finishedSingleOp(); } + thread.stats.addBytes(bytes); } - private void snappyUncompressDirectBuffer() + private void snappyUncompressDirectBuffer(ThreadState thread) { int inputSize = new Options().blockSize(); byte[] compressedOutput = new byte[Snappy.maxCompressedLength(inputSize)]; - byte[] raw = generator.generate(inputSize); + byte[] raw = newGenerator().generate(inputSize); int compressedLength; try { compressedLength = Snappy.compress(raw, 0, raw.length, compressedOutput, 0); @@ -593,6 +678,7 @@ private void snappyUncompressDirectBuffer() ByteBuffer compressedBuffer = ByteBuffer.allocateDirect(compressedLength); compressedBuffer.put(compressedOutput, 0, compressedLength); + long bytes = 0; // attempt to uncompress the block while (bytes < 5L * 1024 * 1048576) { // Compress 1G try { @@ -603,13 +689,25 @@ private void snappyUncompressDirectBuffer() bytes += inputSize; } catch (IOException ignored) { + thread.stats.addMessage("(snappy failure)"); throw Throwables.propagate(ignored); } - finishedSingleOp(); + thread.stats.finishedSingleOp(); + thread.stats.addBytes(bytes); } } + private void writeSeq(ThreadState thread) throws IOException + { + write(thread, true); + } + + private void writeRandom(ThreadState thread) throws IOException + { + write(thread, false); + } + private void heapProfile() { //To change body of created methods use File | Settings | File Templates. @@ -622,8 +720,12 @@ private void destroyDb() FileUtils.deleteRecursively(databaseDir); } - private void printStats() + private void printStats(String name) { + final String property = db.getProperty(name); + if (property != null) { + System.out.print(property); + } //To change body of created methods use File | Settings | File Templates. } @@ -677,13 +779,10 @@ private enum Flag // stats -- Print DB stats // heapprofile -- Dump a heap profile (if supported by this port) benchmarks(ImmutableList.of( - "fillseq", - "fillseq", "fillseq", "fillsync", "fillrandom", "overwrite", - "fillseq", "readrandom", "readrandom", // Extra run to allow previous compactions to quiesce "readseq", @@ -696,121 +795,130 @@ private enum Flag // "crc32c", "snappycomp", "unsnap-array", - "unsnap-direct" + "unsnap-direct", + "stats" // "acquireload" - )) - { - @Override - public Object parseValue(String value) - { - return ImmutableList.copyOf(Splitter.on(",").trimResults().omitEmptyStrings().split(value)); - } - }, + )) { + @Override + public Object parseValue(String value) + { + return ImmutableList.copyOf(Splitter.on(",").trimResults().omitEmptyStrings().split(value)); + } + }, // Arrange to generate values that shrink to this fraction of // their original size after compression - compression_ratio(0.5d) - { - @Override - public Object parseValue(String value) - { - return Double.parseDouble(value); - } - }, + compression_ratio(0.5d) { + @Override + public Object parseValue(String value) + { + return Double.parseDouble(value); + } + }, // Print histogram of operation timings - histogram(false) - { - @Override - public Object parseValue(String value) - { - return Boolean.parseBoolean(value); - } - }, + histogram(false) { + @Override + public Object parseValue(String value) + { + return Boolean.parseBoolean(value); + } + }, // If true, do not destroy the existing database. If you set this // flag and also specify a benchmark that wants a fresh database, that // benchmark will fail. - use_existing_db(false) - { - @Override - public Object parseValue(String value) - { - return Boolean.parseBoolean(value); - } - }, + use_existing_db(false) { + @Override + public Object parseValue(String value) + { + return Boolean.parseBoolean(value); + } + }, // Number of key/values to place in database - num(1000000) - { - @Override - public Object parseValue(String value) - { - return Integer.parseInt(value); - } - }, + num(1000000) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, // Number of read operations to do. If negative, do FLAGS_num reads. - reads(null) - { - @Override - public Object parseValue(String value) - { - return Integer.parseInt(value); - } - }, + reads(null) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, + + // Number of concurrent threads to run. + threads(1) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, // Size of each value - value_size(100) - { - @Override - public Object parseValue(String value) - { - return Integer.parseInt(value); - } - }, + value_size(100) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, // Number of bytes to buffer in memtable before compacting // (initialized to default value by "main") - write_buffer_size(null) - { - @Override - public Object parseValue(String value) - { - return Integer.parseInt(value); - } - }, + write_buffer_size(null) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, // Number of bytes to use as a cache of uncompressed data. // Negative means use default settings. - cache_size(-1) - { - @Override - public Object parseValue(String value) - { - return Integer.parseInt(value); - } - }, + cache_size(-1) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, + + // Bloom filter bits per key. + // Negative means use default settings. + bloom_bits(-1) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, // Maximum number of files to keep open at the same time (use default if == 0) - open_files(0) - { - @Override - public Object parseValue(String value) - { - return Integer.parseInt(value); - } - }, + open_files(0) { + @Override + public Object parseValue(String value) + { + return Integer.parseInt(value); + } + }, // Use the db with the following name. - db("/tmp/dbbench") - { - @Override - public Object parseValue(String value) - { - return value; - } - }; + db("/tmp/dbbench") { + @Override + public Object parseValue(String value) + { + return value; + } + }; private final Object defaultValue; @@ -885,4 +993,180 @@ private static Slice generateRandomSlice(Random random, int length) } return rawData; } + + private static class SharedState + { + ReentrantLock mu; + Condition cv; + int total; + + // Each thread goes through the following states: + // (1) initializing + // (2) waiting for others to be initialized + // (3) running + // (4) done + int numInitialized; + int numDone; + boolean start; + + public SharedState() + { + this.mu = new ReentrantLock(); + this.cv = mu.newCondition(); + } + } + + private class ThreadState + { + int tid; // 0..n-1 when running in n threads + Random rand; // Has different seeds for different threads + DbBenchmark.Stats stats = new Stats(); + SharedState shared; + + public ThreadState(int index) + { + this.tid = index; + this.rand = new Random(1000 + index); + } + } + + private class ThreadArg + { + DbBenchmark bm; + SharedState shared; + ThreadState thread; + String method; + } + + private class Stats + { + long start; + long finish; + double seconds; + int done; + int nextReport; + long bytes; + double lastOpFinish; + Histogram hist = new Histogram(); + StringBuilder message = new StringBuilder(); + + public Stats() + { + start(); + } + + void start() + { + nextReport = 100; + lastOpFinish = start; + hist.clear(); + done = 0; + bytes = 0; + seconds = 0; + start = System.nanoTime(); + finish = start; + message.setLength(0); + } + + void merge(Stats other) + { + hist.merge(other.hist); + done += other.done; + bytes += other.bytes; + seconds += other.seconds; + if (other.start < start) { + start = other.start; + } + if (other.finish > finish) { + finish = other.finish; + } + + // Just keep the messages from one thread + if (message.length() == 0) { + message = other.message; + } + } + + void stop() + { + finish = System.nanoTime(); + seconds = 1.0d * (finish - start) / TimeUnit.SECONDS.toNanos(1); + } + + void addMessage(String msg) + { + if (message.length() != 0) { + message.append(" "); + } + message.append(msg); + } + + void finishedSingleOp() + { + if (flags.containsKey(Flag.histogram)) { + double now = System.nanoTime(); + double micros = (now - lastOpFinish) / 1000.0d; + hist.add(micros); + if (micros > 20000) { + System.out.printf("long op: %.1f micros%30s\r", micros, ""); + } + lastOpFinish = now; + } + + done++; + if (done >= nextReport) { + if (nextReport < 1000) { + nextReport += 100; + } + else if (nextReport < 5000) { + nextReport += 500; + } + else if (nextReport < 10000) { + nextReport += 1000; + } + else if (nextReport < 50000) { + nextReport += 5000; + } + else if (nextReport < 100000) { + nextReport += 10000; + } + else if (nextReport < 500000) { + nextReport += 50000; + } + else { + nextReport += 100000; + } + System.out.printf("... finished %d ops%30s\r", done, ""); + } + } + + void addBytes(long n) + { + bytes += n; + } + + void report(String name) + { + + // Pretend at least one op was done in case we are running a benchmark + // that does nto call FinishedSingleOp(). + if (done < 1) { + done = 1; + } + + if (bytes > 0) { + String rate = String.format("%6.1f MB/s", (bytes / 1048576.0) / seconds); + message.insert(0, " ").insert(0, rate); + } + + System.out.printf("%-12s : %11.5f micros/op;%s%s\n", + name, + seconds * 1.0e6 / done, + (message == null ? "" : " "), + message); + if (flags.get(Flag.histogram).equals(true)) { + System.out.printf("Microseconds per op:\n%s\n", hist.toString()); + } + } + } } diff --git a/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/Histogram.java b/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/Histogram.java new file mode 100644 index 00000000..d443524f --- /dev/null +++ b/leveldb-benchmark/src/main/java/org/iq80/leveldb/benchmark/Histogram.java @@ -0,0 +1,178 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.benchmark; + +import com.google.common.base.Strings; + +public class Histogram +{ + static final double[] K_BUCKET_LIMIT = { + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16, 18, 20, 25, 30, 35, 40, 45, + 50, 60, 70, 80, 90, 100, 120, 140, 160, 180, 200, 250, 300, 350, 400, 450, + 500, 600, 700, 800, 900, 1000, 1200, 1400, 1600, 1800, 2000, 2500, 3000, + 3500, 4000, 4500, 5000, 6000, 7000, 8000, 9000, 10000, 12000, 14000, + 16000, 18000, 20000, 25000, 30000, 35000, 40000, 45000, 50000, 60000, + 70000, 80000, 90000, 100000, 120000, 140000, 160000, 180000, 200000, + 250000, 300000, 350000, 400000, 450000, 500000, 600000, 700000, 800000, + 900000, 1000000, 1200000, 1400000, 1600000, 1800000, 2000000, 2500000, + 3000000, 3500000, 4000000, 4500000, 5000000, 6000000, 7000000, 8000000, + 9000000, 10000000, 12000000, 14000000, 16000000, 18000000, 20000000, + 25000000, 30000000, 35000000, 40000000, 45000000, 50000000, 60000000, + 70000000, 80000000, 90000000, 100000000, 120000000, 140000000, 160000000, + 180000000, 200000000, 250000000, 300000000, 350000000, 400000000, + 450000000, 500000000, 600000000, 700000000, 800000000, 900000000, + 1000000000, 1200000000, 1400000000, 1600000000, 1800000000, 2000000000, + 2500000000.0, 3000000000.0, 3500000000.0, 4000000000.0, 4500000000.0, + 5000000000.0, 6000000000.0, 7000000000.0, 8000000000.0, 9000000000.0, + 1e200, + }; + private final int kNumBuckets = 154; + private double min; + private double max; + private double num; + private double sum; + private double sumSquares; + + private double[] doubles = new double[kNumBuckets]; + + public void clear() + { + min = K_BUCKET_LIMIT[kNumBuckets - 1]; + max = 0; + num = 0; + sum = 0; + sumSquares = 0; + for (int i = 0; i < kNumBuckets; i++) { + doubles[i] = 0; + } + } + + public void add(double value) + { + // Linear search is fast enough for our usage in db_bench + int b = 0; + while (b < kNumBuckets - 1 && K_BUCKET_LIMIT[b] <= value) { + b++; + } + doubles[b] += 1.0; + if (min > value) { + min = value; + } + if (max < value) { + max = value; + } + num++; + sum += value; + sumSquares += (value * value); + } + + public void merge(Histogram other) + { + if (other.min < min) { + min = other.min; + } + if (other.max > max) { + max = other.max; + } + num += other.num; + sum += other.sum; + sumSquares += other.sumSquares; + for (int b = 0; b < kNumBuckets; b++) { + doubles[b] += other.doubles[b]; + } + } + + public double median() + { + return percentile(50.0); + } + + public double percentile(double p) + { + double threshold = num * (p / 100.0); + double sum = 0; + for (int b = 0; b < kNumBuckets; b++) { + sum += doubles[b]; + if (sum >= threshold) { + // Scale linearly within this bucket + double leftPoint = (b == 0) ? 0 : K_BUCKET_LIMIT[b - 1]; + double rightPoint = K_BUCKET_LIMIT[b]; + double leftSum = sum - doubles[b]; + double rightSum = sum; + double pos = (threshold - leftSum) / (rightSum - leftSum); + double r = leftPoint + (rightPoint - leftPoint) * pos; + if (r < min) { + r = min; + } + if (r > max) { + r = max; + } + return r; + } + } + return max; + } + + public double average() + { + if (num == 0.0) { + return 0; + } + return sum / num; + } + + public double standardDeviation() + { + if (num == 0.0) { + return 0; + } + double variance = (sumSquares * num - sum * sum) / (num * num); + return Math.sqrt(variance); + } + + public String toString() + { + StringBuilder r = new StringBuilder(); + r.append(String.format("Count: %.0f Average: %.4f StdDev: %.2f\n", + num, average(), standardDeviation())); + r.append(String.format("Min: %.4f Median: %.4f Max: %.4f\n", + (num == 0.0 ? 0.0 : min), median(), max)); + r.append("------------------------------------------------------\n"); + r.append("left right count % cum % \n"); + double mult = 100.0 / num; + double sum = 0; + for (int b = 0; b < kNumBuckets; b++) { + if (doubles[b] <= 0.0) { + continue; + } + sum += doubles[b]; + r.append(String.format("[ %7.0f, %7.0f ) %7.0f %7.3f%% %7.3f%% ", + ((b == 0) ? 0.0 : K_BUCKET_LIMIT[b - 1]), // left + K_BUCKET_LIMIT[b], // right + doubles[b], // count + mult * doubles[b], // percentage + mult * sum)); // cumulative percentage + + // Add hash marks based on percentage; 20 marks for 100%. + int marks = (int) (20 * (doubles[b] / num) + 0.5); + r.append(Strings.repeat("#", marks)); + r.append("\n"); + } + return r.toString(); + } +} diff --git a/leveldb/pom.xml b/leveldb/pom.xml index e580ec56..7ff9a267 100644 --- a/leveldb/pom.xml +++ b/leveldb/pom.xml @@ -111,8 +111,15 @@ org.apache.maven.plugins maven-surefire-plugin - - + + + listener + org.testng.reporters.VerboseReporter + + + 1 + true + -Xmx2048m diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Compaction.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Compaction.java index f0699b7a..eeb6089a 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Compaction.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Compaction.java @@ -27,9 +27,9 @@ import static org.iq80.leveldb.impl.VersionSet.MAX_GRAND_PARENT_OVERLAP_BYTES; // A Compaction encapsulates information about a compaction. -public class Compaction +public class Compaction implements AutoCloseable { - private final Version inputVersion; + private Version inputVersion; private final int level; // Each compaction reads inputs from "level" and "level+1" @@ -70,6 +70,7 @@ public Compaction(Version inputVersion, int level, List levelInput this.grandparents = grandparents; this.maxOutputFileSize = VersionSet.maxFileSizeForLevel(level); this.inputs = new List[] {levelInputs, levelUpInputs}; + inputVersion.retain(); } public int getLevel() @@ -196,8 +197,17 @@ public boolean shouldStopBefore(InternalKey internalKey) } } - public List[] getInputs() + @Override + public void close() { - return inputs; + if (inputVersion != null) { + inputVersion.release(); + inputVersion = null; + } + } + + public List input(int which) + { + return inputs[which]; } } diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/DbImpl.java b/leveldb/src/main/java/org/iq80/leveldb/impl/DbImpl.java old mode 100755 new mode 100644 index d0cafac4..2d7612a1 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/DbImpl.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/DbImpl.java @@ -17,7 +17,7 @@ */ package org.iq80.leveldb.impl; -import com.google.common.base.Throwables; +import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ThreadFactoryBuilder; import org.iq80.leveldb.CompressionType; import org.iq80.leveldb.DB; @@ -35,28 +35,31 @@ import org.iq80.leveldb.impl.WriteBatchImpl.Handler; import org.iq80.leveldb.table.BytewiseComparator; import org.iq80.leveldb.table.CustomUserComparator; +import org.iq80.leveldb.table.FilterPolicy; import org.iq80.leveldb.table.TableBuilder; import org.iq80.leveldb.table.UserComparator; import org.iq80.leveldb.util.DbIterator; import org.iq80.leveldb.util.MergingIterator; +import org.iq80.leveldb.util.SequentialFile; +import org.iq80.leveldb.util.SequentialFileImpl; import org.iq80.leveldb.util.Slice; import org.iq80.leveldb.util.SliceInput; import org.iq80.leveldb.util.SliceOutput; import org.iq80.leveldb.util.Slices; import org.iq80.leveldb.util.Snappy; +import org.iq80.leveldb.util.UnbufferedWritableFile; +import org.iq80.leveldb.util.WritableFile; import java.io.File; -import java.io.FileInputStream; import java.io.FileNotFoundException; -import java.io.FileOutputStream; import java.io.IOException; import java.lang.Thread.UncaughtExceptionHandler; -import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Collections; +import java.util.Deque; +import java.util.LinkedList; import java.util.List; import java.util.Map.Entry; -import java.util.concurrent.Callable; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; @@ -65,13 +68,14 @@ import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.locks.Condition; import java.util.concurrent.locks.ReentrantLock; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; import static org.iq80.leveldb.impl.DbConstants.L0_SLOWDOWN_WRITES_TRIGGER; import static org.iq80.leveldb.impl.DbConstants.L0_STOP_WRITES_TRIGGER; -import static org.iq80.leveldb.impl.DbConstants.NUM_LEVELS; import static org.iq80.leveldb.impl.SequenceNumber.MAX_SEQUENCE_NUMBER; import static org.iq80.leveldb.impl.ValueType.DELETION; import static org.iq80.leveldb.impl.ValueType.VALUE; @@ -96,11 +100,15 @@ public class DbImpl private final Condition backgroundCondition = mutex.newCondition(); private final List pendingOutputs = new ArrayList<>(); // todo + private final Deque writers = new LinkedList<>(); + private final SnapshotList snapshots = new SnapshotList(mutex); + private final WriteBatchImpl tmpBatch = new WriteBatchImpl(); + private final Env env; private LogWriter log; private MemTable memTable; - private MemTable immutableMemTable; + private volatile MemTable immutableMemTable; private final InternalKeyComparator internalKeyComparator; @@ -110,9 +118,12 @@ public class DbImpl private ManualCompaction manualCompaction; - public DbImpl(Options options, File databaseDir) + private CompactionStats[] stats = new CompactionStats[DbConstants.NUM_LEVELS]; + + public DbImpl(Options options, File databaseDir, Env env) throws IOException { + this.env = env; requireNonNull(options, "options is null"); requireNonNull(databaseDir, "databaseDir is null"); this.options = options; @@ -124,6 +135,11 @@ public DbImpl(Options options, File databaseDir) this.databaseDir = databaseDir; + if (this.options.filterPolicy() != null) { + checkArgument(this.options.filterPolicy() instanceof FilterPolicy, "Filter policy must implement Java interface FilterPolicy"); + this.options.filterPolicy(InternalFilterPolicy.convert(this.options.filterPolicy())); + } + //use custom comparator if set DBComparator comparator = options.comparator(); UserComparator userComparator; @@ -154,7 +170,7 @@ public void uncaughtException(Thread t, Throwable e) // Reserve ten files or so for other uses and give the rest to TableCache. int tableCacheSize = options.maxOpenFiles() - 10; - tableCache = new TableCache(databaseDir, tableCacheSize, new InternalUserComparator(internalKeyComparator), options.verifyChecksums()); + tableCache = new TableCache(databaseDir, tableCacheSize, new InternalUserComparator(internalKeyComparator), options); // create the version set @@ -163,6 +179,10 @@ public void uncaughtException(Thread t, Throwable e) checkArgument(databaseDir.exists(), "Database directory '%s' does not exist and could not be created", databaseDir); checkArgument(databaseDir.isDirectory(), "Database directory '%s' is not a directory", databaseDir); + for (int i = 0; i < DbConstants.NUM_LEVELS; i++) { + stats[i] = new CompactionStats(); + } + mutex.lock(); try { // lock the database dir @@ -177,7 +197,7 @@ public void uncaughtException(Thread t, Throwable e) checkArgument(!options.errorIfExists(), "Database '%s' exists and the error if exists option is enabled", databaseDir); } - versions = new VersionSet(databaseDir, tableCache, internalKeyComparator); + versions = new VersionSet(databaseDir, tableCache, internalKeyComparator, options.allowMmapWrites()); // load (and recover) current version versions.recover(); @@ -216,11 +236,11 @@ public void uncaughtException(Thread t, Throwable e) // open transaction log long logFileNumber = versions.getNextFileNumber(); - this.log = Logs.createLogWriter(new File(databaseDir, Filename.logFileName(logFileNumber)), logFileNumber); + this.log = Logs.createLogWriter(new File(databaseDir, Filename.logFileName(logFileNumber)), logFileNumber, options.allowMmapWrites()); edit.setLogNumber(log.getFileNumber()); // apply recovered edits - versions.logAndApply(edit); + versions.logAndApply(edit, mutex); // cleanup unused files deleteObsoleteFiles(); @@ -275,6 +295,47 @@ public void close() public String getProperty(String name) { checkBackgroundException(); + if (!name.startsWith("leveldb.")) { + return null; + } + String key = name.substring("leveldb.".length()); + mutex.lock(); + try { + Matcher matcher; + matcher = Pattern.compile("num-files-at-level(\\d+)") + .matcher(key); + if (matcher.matches()) { + final int level = Integer.valueOf(matcher.group(1)); + return String.valueOf(versions.numberOfFilesInLevel(level)); + } + matcher = Pattern.compile("stats") + .matcher(key); + if (matcher.matches()) { + final StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(" Compactions\n"); + stringBuilder.append("Level Files Size(MB) Time(sec) Read(MB) Write(MB)\n"); + stringBuilder.append("--------------------------------------------------\n"); + for (int level = 0; level < DbConstants.NUM_LEVELS; level++) { + int files = versions.numberOfFilesInLevel(level); + if (stats[level].micros > 0 || files > 0) { + stringBuilder.append(String.format( + "%3d %8d %8.0f %9.0f %8.0f %9.0f\n", + level, + files, + versions.numberOfBytesInLevel(level) / 1048576.0, + stats[level].micros / 1e6, + stats[level].bytesRead / 1048576.0, + stats[level].bytesWritten / 1048576.0)); + } + } + return stringBuilder.toString(); + } + //TODO implement sstables + //TODO implement approximate-memory-usage + } + finally { + mutex.unlock(); + } return null; } @@ -338,13 +399,13 @@ public void flushMemTable() mutex.lock(); try { // force compaction - makeRoomForWrite(true); + writeInternal(null, new WriteOptions()); // todo bg_error code while (immutableMemTable != null) { backgroundCondition.awaitUninterruptibly(); } - + checkBackgroundException(); } finally { mutex.unlock(); @@ -354,7 +415,7 @@ public void flushMemTable() public void compactRange(int level, Slice start, Slice end) { checkArgument(level >= 0, "level is negative"); - checkArgument(level + 1 < NUM_LEVELS, "level is greater than or equal to %s", NUM_LEVELS); + checkArgument(level + 1 < DbConstants.NUM_LEVELS, "level is greater than or equal to %s", DbConstants.NUM_LEVELS); requireNonNull(start, "start is null"); requireNonNull(end, "end is null"); @@ -363,7 +424,9 @@ public void compactRange(int level, Slice start, Slice end) while (this.manualCompaction != null) { backgroundCondition.awaitUninterruptibly(); } - ManualCompaction manualCompaction = new ManualCompaction(level, start, end); + ManualCompaction manualCompaction = new ManualCompaction(level, + new InternalKey(start, SequenceNumber.MAX_SEQUENCE_NUMBER, VALUE), + new InternalKey(end, 0, DELETION)); this.manualCompaction = manualCompaction; maybeScheduleCompaction(); @@ -388,29 +451,16 @@ private void maybeScheduleCompaction() else if (shuttingDown.get()) { // DB is being shutdown; no more background compactions } + else if (backgroundException != null) { + // Already got an error; no more changes + } else if (immutableMemTable == null && manualCompaction == null && !versions.needsCompaction()) { // No work to be done } else { - backgroundCompaction = compactionExecutor.submit(new Callable() - { - @Override - public Void call() - throws Exception - { - try { - backgroundCall(); - } - catch (DatabaseShutdownException ignored) { - } - catch (Throwable e) { - backgroundException = e; - } - return null; - } - }); + backgroundCompaction = compactionExecutor.submit(this::backgroundCall); } } @@ -423,36 +473,37 @@ public void checkBackgroundException() } private void backgroundCall() - throws IOException { mutex.lock(); try { - if (backgroundCompaction == null) { - return; - } + checkState(backgroundCompaction != null, "Compaction was not correctly scheduled"); try { - if (!shuttingDown.get()) { + if (!shuttingDown.get() && backgroundException == null) { backgroundCompaction(); } } finally { backgroundCompaction = null; } + // Previous compaction may have produced too many files in a level, + // so reschedule another compaction if needed. + maybeScheduleCompaction(); + } + catch (DatabaseShutdownException ignored) { + } + catch (Throwable throwable) { + backgroundException = throwable; + if (throwable instanceof Error) { + throw (Error) throwable; + } } finally { try { - // Previous compaction may have produced too many files in a level, - // so reschedule another compaction if needed. - maybeScheduleCompaction(); + backgroundCondition.signalAll(); } finally { - try { - backgroundCondition.signalAll(); - } - finally { - mutex.unlock(); - } + mutex.unlock(); } } } @@ -462,13 +513,20 @@ private void backgroundCompaction() { checkState(mutex.isHeldByCurrentThread()); - compactMemTableInternal(); + if (immutableMemTable != null) { + compactMemTable(); + } Compaction compaction; - if (manualCompaction != null) { - compaction = versions.compactRange(manualCompaction.level, - new InternalKey(manualCompaction.begin, MAX_SEQUENCE_NUMBER, VALUE), - new InternalKey(manualCompaction.end, 0, DELETION)); + InternalKey manualEnd = null; + boolean isManual = manualCompaction != null; + if (isManual) { + ManualCompaction m = this.manualCompaction; + compaction = versions.compactRange(m.level, m.begin, m.end); + m.done = compaction == null; + if (compaction != null) { + manualEnd = compaction.input(0, compaction.getLevelInputs().size() - 1).getLargest(); + } } else { compaction = versions.pickCompaction(); @@ -477,23 +535,35 @@ private void backgroundCompaction() if (compaction == null) { // no compaction } - else if (manualCompaction == null && compaction.isTrivialMove()) { + else if (!isManual && compaction.isTrivialMove()) { // Move file to next level checkState(compaction.getLevelInputs().size() == 1); FileMetaData fileMetaData = compaction.getLevelInputs().get(0); compaction.getEdit().deleteFile(compaction.getLevel(), fileMetaData.getNumber()); compaction.getEdit().addFile(compaction.getLevel() + 1, fileMetaData); - versions.logAndApply(compaction.getEdit()); + versions.logAndApply(compaction.getEdit(), mutex); // log } else { CompactionState compactionState = new CompactionState(compaction); doCompactionWork(compactionState); + compaction.close(); //release resources cleanupCompaction(compactionState); + deleteObsoleteFiles(); + } + if (compaction != null) { + compaction.close(); } // manual compaction complete - if (manualCompaction != null) { + if (isManual) { + ManualCompaction m = manualCompaction; + if (backgroundException != null) { + m.done = true; + } + if (!m.done) { + m.begin = manualEnd; + } manualCompaction = null; } } @@ -519,10 +589,9 @@ private long recoverLogFile(long fileNumber, VersionEdit edit) { checkState(mutex.isHeldByCurrentThread()); File file = new File(databaseDir, Filename.logFileName(fileNumber)); - try (FileInputStream fis = new FileInputStream(file); - FileChannel channel = fis.getChannel()) { + try (SequentialFile in = SequentialFileImpl.open(file);) { LogMonitor logMonitor = LogMonitors.logMonitor(); - LogReader logReader = new LogReader(channel, logMonitor, true, 0); + LogReader logReader = new LogReader(in, logMonitor, true, 0); // Log(options_.info_log, "Recovering log #%llu", (unsigned long long) log_number); @@ -583,44 +652,41 @@ public byte[] get(byte[] key, ReadOptions options) { checkBackgroundException(); LookupKey lookupKey; + LookupResult lookupResult; mutex.lock(); try { - SnapshotImpl snapshot = getSnapshot(options); - lookupKey = new LookupKey(Slices.wrappedBuffer(key), snapshot.getLastSequence()); + long lastSequence = options.snapshot() != null ? + snapshots.getSequenceFrom(options.snapshot()) : versions.getLastSequence(); + lookupKey = new LookupKey(Slices.wrappedBuffer(key), lastSequence); // First look in the memtable, then in the immutable memtable (if any). - LookupResult lookupResult = memTable.get(lookupKey); - if (lookupResult != null) { - Slice value = lookupResult.getValue(); - if (value == null) { - return null; - } - return value.getBytes(); - } - if (immutableMemTable != null) { - lookupResult = immutableMemTable.get(lookupKey); - if (lookupResult != null) { - Slice value = lookupResult.getValue(); - if (value == null) { - return null; - } - return value.getBytes(); - } - } - } - finally { + final MemTable memTable = this.memTable; + final MemTable immutableMemTable = this.immutableMemTable; + final Version current = versions.getCurrent(); + current.retain(); + ReadStats readStats = null; mutex.unlock(); - } + try { + lookupResult = memTable.get(lookupKey); + if (lookupResult == null && immutableMemTable != null) { + lookupResult = immutableMemTable.get(lookupKey); + } - // Not in memTables; try live files in level order - LookupResult lookupResult = versions.get(lookupKey); + if (lookupResult == null) { + // Not in memTables; try live files in level order + readStats = new ReadStats(); + lookupResult = current.get(lookupKey, readStats); + } - // schedule compaction if necessary - mutex.lock(); - try { - if (versions.needsCompaction()) { + // schedule compaction if necessary + } + finally { + mutex.lock(); + } + if (readStats != null && current.updateStats(readStats)) { maybeScheduleCompaction(); } + current.release(); } finally { mutex.unlock(); @@ -646,21 +712,27 @@ public void put(byte[] key, byte[] value) public Snapshot put(byte[] key, byte[] value, WriteOptions options) throws DBException { - return writeInternal(new WriteBatchImpl().put(key, value), options); + try (WriteBatchImpl writeBatch = new WriteBatchImpl()) { + return writeInternal(writeBatch.put(key, value), options); + } } @Override public void delete(byte[] key) throws DBException { - writeInternal(new WriteBatchImpl().delete(key), new WriteOptions()); + try (WriteBatchImpl writeBatch = new WriteBatchImpl()) { + writeInternal(writeBatch.delete(key), new WriteOptions()); + } } @Override public Snapshot delete(byte[] key, WriteOptions options) throws DBException { - return writeInternal(new WriteBatchImpl().delete(key), options); + try (WriteBatchImpl writeBatch = new WriteBatchImpl()) { + return writeInternal(writeBatch.delete(key), options); + } } @Override @@ -677,41 +749,84 @@ public Snapshot write(WriteBatch updates, WriteOptions options) return writeInternal((WriteBatchImpl) updates, options); } - public Snapshot writeInternal(WriteBatchImpl updates, WriteOptions options) + public Snapshot writeInternal(WriteBatchImpl myBatch, WriteOptions options) throws DBException { checkBackgroundException(); + final WriteBatchInternal w = new WriteBatchInternal(myBatch, options.sync(), mutex.newCondition()); mutex.lock(); try { + writers.offerLast(w); + while (!w.done && writers.peekFirst() != w) { + w.await(); + } + if (w.done) { + return null; + } long sequenceEnd; - if (updates.size() != 0) { - makeRoomForWrite(false); + WriteBatchImpl updates = null; + ValueHolder lastWriter = new ValueHolder<>(w); + // May temporarily unlock and wait. + makeRoomForWrite(myBatch == null); + if (myBatch != null) { + updates = buildBatchGroup(lastWriter); // Get sequence numbers for this change set long sequenceBegin = versions.getLastSequence() + 1; sequenceEnd = sequenceBegin + updates.size() - 1; + // Add to log and apply to memtable. We can release the lock + // during this phase since "w" is currently responsible for logging + // and protects against concurrent loggers and concurrent writes + // into mem_. + // log and memtable are modified by makeRoomForWrite + { + mutex.unlock(); + try { + // Log write + Slice record = writeWriteBatch(updates, sequenceBegin); + try { + log.addRecord(record, options.sync()); + } + catch (IOException e) { + throw new DBException(e); + } + + // Update memtable + //this.memTable is modified by makeRoomForWrite + updates.forEach(new InsertIntoHandler(this.memTable, sequenceBegin)); + } + finally { + mutex.lock(); + } + } + if (updates == tmpBatch) { + tmpBatch.clear(); + } // Reserve this sequence in the version set versions.setLastSequence(sequenceEnd); + } - // Log write - Slice record = writeWriteBatch(updates, sequenceBegin); - try { - log.addRecord(record, options.sync()); + final WriteBatchInternal lastWriteV = lastWriter.getValue(); + while (true) { + WriteBatchInternal ready = writers.peekFirst(); + writers.pollFirst(); + if (ready != w) { + ready.done = true; + ready.signal(); } - catch (IOException e) { - throw Throwables.propagate(e); + if (ready == lastWriteV) { + break; } - - // Update memtable - updates.forEach(new InsertIntoHandler(memTable, sequenceBegin)); } - else { - sequenceEnd = versions.getLastSequence(); + + // Notify new head of write queue + if (!writers.isEmpty()) { + writers.peekFirst().signal(); } if (options.snapshot()) { - return new SnapshotImpl(versions.getCurrent(), sequenceEnd); + return snapshots.newSnapshot(versions.getLastSequence()); } else { return null; @@ -722,6 +837,60 @@ public Snapshot writeInternal(WriteBatchImpl updates, WriteOptions options) } } + /** + * REQUIRES: Writer list must be non-empty + * REQUIRES: First writer must have a non-NULL batch + */ + private WriteBatchImpl buildBatchGroup(ValueHolder lastWriter) + { + checkArgument(!writers.isEmpty(), "A least one writer is required"); + final WriteBatchInternal first = writers.peekFirst(); + WriteBatchImpl result = first.batch; + checkArgument(result != null, "Batch must be non null"); + + int sizeInit; + sizeInit = first.batch.getApproximateSize(); + /* + * Allow the group to grow up to a maximum size, but if the + * original write is small, limit the growth so we do not slow + * down the small write too much. + */ + int maxSize = 1 << 20; + if (sizeInit <= (128 << 10)) { + maxSize = sizeInit + (128 << 10); + } + + int size = 0; + lastWriter.setValue(first); + for (WriteBatchInternal w : writers) { + if (w.sync && !lastWriter.getValue().sync) { + // Do not include a sync write into a batch handled by a non-sync write. + break; + } + + if (w.batch != null) { + size += w.batch.getApproximateSize(); + if (size > maxSize) { + // Do not make batch too big + break; + } + + // Append to result + if (result == first.batch) { + // Switch to temporary batch instead of disturbing caller's batch + result = tmpBatch; + checkState(result.size() == 0, "Temp batch should be clean"); + result.append(first.batch); + } + else if (first.batch != w.batch) { + result.append(w.batch); + } + } + lastWriter.setValue(w); + } + return result; + } + @Override public WriteBatch createWriteBatch() { @@ -744,7 +913,7 @@ public SeekingIteratorAdapter iterator(ReadOptions options) DbIterator rawIterator = internalIterator(); // filter any entries not visible in our snapshot - SnapshotImpl snapshot = getSnapshot(options); + long snapshot = getSnapshot(options); SnapshotSeekingIterator snapshotIterator = new SnapshotSeekingIterator(rawIterator, snapshot, internalKeyComparator.getUserComparator()); return new SeekingIteratorAdapter(snapshotIterator); } @@ -753,18 +922,6 @@ public SeekingIteratorAdapter iterator(ReadOptions options) } } - SeekingIterable internalIterable() - { - return new SeekingIterable() - { - @Override - public DbIterator iterator() - { - return internalIterator(); - } - }; - } - DbIterator internalIterator() { mutex.lock(); @@ -775,7 +932,16 @@ DbIterator internalIterator() iterator = immutableMemTable.iterator(); } Version current = versions.getCurrent(); - return new DbIterator(memTable.iterator(), iterator, current.getLevel0Files(), current.getLevelIterators(), internalKeyComparator); + current.retain(); + return new DbIterator(memTable.iterator(), iterator, current.getLevelIterators(), internalKeyComparator, () -> { + mutex.lock(); + try { + current.release(); + } + finally { + mutex.unlock(); + } + }); } finally { mutex.unlock(); @@ -788,22 +954,21 @@ public Snapshot getSnapshot() checkBackgroundException(); mutex.lock(); try { - return new SnapshotImpl(versions.getCurrent(), versions.getLastSequence()); + return snapshots.newSnapshot(versions.getLastSequence()); } finally { mutex.unlock(); } } - private SnapshotImpl getSnapshot(ReadOptions options) + private long getSnapshot(ReadOptions options) { - SnapshotImpl snapshot; + long snapshot; if (options.snapshot() != null) { - snapshot = (SnapshotImpl) options.snapshot(); + snapshot = snapshots.getSequenceFrom(options.snapshot()); } else { - snapshot = new SnapshotImpl(versions.getCurrent(), versions.getLastSequence()); - snapshot.close(); // To avoid holding the snapshot active.. + snapshot = versions.getLastSequence(); } return snapshot; } @@ -811,17 +976,15 @@ private SnapshotImpl getSnapshot(ReadOptions options) private void makeRoomForWrite(boolean force) { checkState(mutex.isHeldByCurrentThread()); + checkState(!writers.isEmpty()); boolean allowDelay = !force; while (true) { - // todo background processing system need work -// if (!bg_error_.ok()) { -// // Yield previous error -// s = bg_error_; -// break; -// } else - if (allowDelay && versions.numberOfFilesInLevel(0) > L0_SLOWDOWN_WRITES_TRIGGER) { + if (backgroundException != null) { + throw new DBException("Background exception occurred", backgroundException); + } + else if (allowDelay && versions.numberOfFilesInLevel(0) > L0_SLOWDOWN_WRITES_TRIGGER) { // We are getting close to hitting a hard limit on the number of // L0 files. Rather than delaying a single write by several // seconds when we hit the hard limit, start delaying each @@ -866,13 +1029,13 @@ else if (versions.numberOfFilesInLevel(0) >= L0_STOP_WRITES_TRIGGER) { log.close(); } catch (IOException e) { - throw new RuntimeException("Unable to close log file " + log.getFile(), e); + throw new RuntimeException("Unable to close log file " + log, e); } // open a new log long logNumber = versions.getNextFileNumber(); try { - this.log = Logs.createLogWriter(new File(databaseDir, Filename.logFileName(logNumber)), logNumber); + this.log = Logs.createLogWriter(new File(databaseDir, Filename.logFileName(logNumber)), logNumber, options.allowMmapWrites()); } catch (IOException e) { throw new RuntimeException("Unable to open new log file " + @@ -891,31 +1054,19 @@ else if (versions.numberOfFilesInLevel(0) >= L0_STOP_WRITES_TRIGGER) { } } - public void compactMemTable() - throws IOException - { - mutex.lock(); - try { - compactMemTableInternal(); - } - finally { - mutex.unlock(); - } - } - - private void compactMemTableInternal() + private void compactMemTable() throws IOException { checkState(mutex.isHeldByCurrentThread()); - if (immutableMemTable == null) { - return; - } + checkState(immutableMemTable != null); try { // Save the contents of the memtable as a new Table VersionEdit edit = new VersionEdit(); Version base = versions.getCurrent(); + base.retain(); writeLevel0Table(immutableMemTable, edit, base); + base.release(); if (shuttingDown.get()) { throw new DatabaseShutdownException("Database shutdown during memtable compaction"); @@ -924,10 +1075,9 @@ private void compactMemTableInternal() // Replace immutable memtable with the generated Table edit.setPreviousLogNumber(0); edit.setLogNumber(log.getFileNumber()); // Earlier logs no longer needed - versions.logAndApply(edit); + versions.logAndApply(edit, mutex); immutableMemTable = null; - deleteObsoleteFiles(); } finally { @@ -938,6 +1088,7 @@ private void compactMemTableInternal() private void writeLevel0Table(MemTable mem, VersionEdit edit, Version base) throws IOException { + final long startMicros = env.nowMicros(); checkState(mutex.isHeldByCurrentThread()); // skip empty mem table @@ -969,6 +1120,7 @@ private void writeLevel0Table(MemTable mem, VersionEdit edit, Version base) } edit.addFile(level, meta); } + this.stats[level].Add(env.nowMicros() - startMicros, 0, meta.getFileSize()); } private FileMetaData buildTable(SeekingIterable data, long fileNumber) @@ -978,9 +1130,8 @@ private FileMetaData buildTable(SeekingIterable data, long f try { InternalKey smallest = null; InternalKey largest = null; - FileChannel channel = new FileOutputStream(file).getChannel(); - try { - TableBuilder tableBuilder = new TableBuilder(options, channel, new InternalUserComparator(internalKeyComparator)); + try (WritableFile writableFile = UnbufferedWritableFile.open(file)) { + TableBuilder tableBuilder = new TableBuilder(options, writableFile, new InternalUserComparator(internalKeyComparator)); for (Entry entry : data) { // update keys @@ -994,14 +1145,7 @@ private FileMetaData buildTable(SeekingIterable data, long f } tableBuilder.finish(); - } - finally { - try { - channel.force(true); - } - finally { - channel.close(); - } + writableFile.force(); } if (smallest == null) { @@ -1012,8 +1156,6 @@ private FileMetaData buildTable(SeekingIterable data, long f // verify table can be opened tableCache.newIterator(fileMetaData); - pendingOutputs.remove(fileNumber); - return fileMetaData; } @@ -1026,13 +1168,14 @@ private FileMetaData buildTable(SeekingIterable data, long f private void doCompactionWork(CompactionState compactionState) throws IOException { + final long startMicros = env.nowMicros(); + long immMicros = 0; // Micros spent doing imm_ compactions checkState(mutex.isHeldByCurrentThread()); checkArgument(versions.numberOfBytesInLevel(compactionState.getCompaction().getLevel()) > 0); checkArgument(compactionState.builder == null); checkArgument(compactionState.outfile == null); - // todo track snapshots - compactionState.smallestSnapshot = versions.getLastSequence(); + compactionState.smallestSnapshot = snapshots.isEmpty() ? versions.getLastSequence() : snapshots.getOldest(); // Release mutex while we're actually doing the compaction work mutex.unlock(); @@ -1045,14 +1188,17 @@ private void doCompactionWork(CompactionState compactionState) long lastSequenceForKey = MAX_SEQUENCE_NUMBER; while (iterator.hasNext() && !shuttingDown.get()) { // always give priority to compacting the current mem table - mutex.lock(); - try { - compactMemTableInternal(); - } - finally { - mutex.unlock(); + if (immutableMemTable != null) { + long immStart = env.nowMicros(); + mutex.lock(); + try { + compactMemTable(); + } + finally { + mutex.unlock(); + } + immMicros += (env.nowMicros() - immStart); } - InternalKey key = iterator.peek().getKey(); if (compactionState.compaction.shouldStopBefore(key) && compactionState.builder != null) { finishCompactionOutputFile(compactionState); @@ -1123,11 +1269,20 @@ else if (key.getValueType() == DELETION && } } finally { + long micros = env.nowMicros() - startMicros - immMicros; + long bytesRead = 0; + for (int which = 0; which < 2; which++) { + for (int i = 0; i < compactionState.compaction.input(which).size(); i++) { + bytesRead += compactionState.compaction.input(which, i).getFileSize(); + } + } + long bytesWritten = 0; + for (int i = 0; i < compactionState.outputs.size(); i++) { + bytesWritten += compactionState.outputs.get(i).getFileSize(); + } mutex.lock(); + this.stats[compactionState.compaction.getLevel() + 1].Add(micros, bytesRead, bytesWritten); } - - // todo port CompactionStats code - installCompactionResults(compactionState); } @@ -1137,22 +1292,22 @@ private void openCompactionOutputFile(CompactionState compactionState) requireNonNull(compactionState, "compactionState is null"); checkArgument(compactionState.builder == null, "compactionState builder is not null"); + long fileNumber; mutex.lock(); try { - long fileNumber = versions.getNextFileNumber(); + fileNumber = versions.getNextFileNumber(); pendingOutputs.add(fileNumber); compactionState.currentFileNumber = fileNumber; compactionState.currentFileSize = 0; compactionState.currentSmallest = null; compactionState.currentLargest = null; - - File file = new File(databaseDir, Filename.tableFileName(fileNumber)); - compactionState.outfile = new FileOutputStream(file).getChannel(); - compactionState.builder = new TableBuilder(options, compactionState.outfile, new InternalUserComparator(internalKeyComparator)); } finally { mutex.unlock(); } + File file = new File(databaseDir, Filename.tableFileName(fileNumber)); + compactionState.outfile = UnbufferedWritableFile.open(file); + compactionState.builder = new TableBuilder(options, compactionState.outfile, new InternalUserComparator(internalKeyComparator)); } private void finishCompactionOutputFile(CompactionState compactionState) @@ -1180,7 +1335,7 @@ private void finishCompactionOutputFile(CompactionState compactionState) compactionState.builder = null; - compactionState.outfile.force(true); + compactionState.outfile.force(); compactionState.outfile.close(); compactionState.outfile = null; @@ -1203,25 +1358,21 @@ private void installCompactionResults(CompactionState compact) pendingOutputs.remove(output.getNumber()); } - try { - versions.logAndApply(compact.compaction.getEdit()); - deleteObsoleteFiles(); - } - catch (IOException e) { - // Compaction failed for some reason. Simply discard the work and try again later. - - // Discard any files we may have created during this failed compaction - for (FileMetaData output : compact.outputs) { - File file = new File(databaseDir, Filename.tableFileName(output.getNumber())); - file.delete(); - } - compact.outputs.clear(); - } + versions.logAndApply(compact.compaction.getEdit(), mutex); } + @VisibleForTesting int numberOfFilesInLevel(int level) { - return versions.getCurrent().numberOfFilesInLevel(level); + mutex.lock(); + Version v; + try { + v = versions.getCurrent(); + } + finally { + mutex.unlock(); + } + return v.numberOfFilesInLevel(level); } @Override @@ -1238,19 +1389,39 @@ public long[] getApproximateSizes(Range... ranges) public long getApproximateSizes(Range range) { - Version v = versions.getCurrent(); + mutex.lock(); + Version v; + try { + v = versions.getCurrent(); + v.retain(); + } + finally { + mutex.unlock(); + } InternalKey startKey = new InternalKey(Slices.wrappedBuffer(range.start()), MAX_SEQUENCE_NUMBER, VALUE); InternalKey limitKey = new InternalKey(Slices.wrappedBuffer(range.limit()), MAX_SEQUENCE_NUMBER, VALUE); long startOffset = v.getApproximateOffsetOf(startKey); long limitOffset = v.getApproximateOffsetOf(limitKey); - + mutex.lock(); + try { + v.release(); + } + finally { + mutex.unlock(); + } return (limitOffset >= startOffset ? limitOffset - startOffset : 0); } public long getMaxNextLevelOverlappingBytes() { - return versions.getMaxNextLevelOverlappingBytes(); + mutex.lock(); + try { + return versions.getMaxNextLevelOverlappingBytes(); + } + finally { + mutex.unlock(); + } } private static class CompactionState @@ -1262,7 +1433,7 @@ private static class CompactionState private long smallestSnapshot; // State kept for output being generated - private FileChannel outfile; + private WritableFile outfile; private TableBuilder builder; // Current file being generated @@ -1287,10 +1458,11 @@ public Compaction getCompaction() private static class ManualCompaction { private final int level; - private final Slice begin; - private final Slice end; + private InternalKey begin; + private final InternalKey end; + private boolean done; - private ManualCompaction(int level, Slice begin, Slice end) + private ManualCompaction(int level, InternalKey begin, InternalKey end) { this.level = level; this.begin = begin; @@ -1298,6 +1470,29 @@ private ManualCompaction(int level, Slice begin, Slice end) } } + // Per level compaction stats. stats[level] stores the stats for + // compactions that produced data for the specified "level". + private static class CompactionStats + { + long micros; + long bytesRead; + long bytesWritten; + + CompactionStats() + { + this.micros = 0; + this.bytesRead = 0; + this.bytesWritten = 0; + } + + public void Add(long micros, long bytesRead, long bytesWritten) + { + this.micros += micros; + this.bytesRead += bytesRead; + this.bytesWritten += bytesWritten; + } + } + private WriteBatchImpl readWriteBatch(SliceInput record, int updateSize) throws IOException { @@ -1353,31 +1548,6 @@ public void delete(Slice key) return record.slice(0, sliceOutput.size()); } - private static class InsertIntoHandler - implements Handler - { - private long sequence; - private final MemTable memTable; - - public InsertIntoHandler(MemTable memTable, long sequenceBegin) - { - this.memTable = memTable; - this.sequence = sequenceBegin; - } - - @Override - public void put(Slice key, Slice value) - { - memTable.add(sequence++, VALUE, key, value); - } - - @Override - public void delete(Slice key) - { - memTable.add(sequence++, DELETION, key, Slices.EMPTY_SLICE); - } - } - public static class DatabaseShutdownException extends DBException { @@ -1445,6 +1615,102 @@ public void resumeCompactions() public void compactRange(byte[] begin, byte[] end) throws DBException { - throw new UnsupportedOperationException("Not yet implemented"); + final Slice smallestUserKey = begin == null ? null : new Slice(begin, 0, begin.length); + final Slice largestUserKey = end == null ? null : new Slice(end, 0, end.length); + int maxLevelWithFiles = 1; + mutex.lock(); + try { + Version base = versions.getCurrent(); + for (int level = 1; level < DbConstants.NUM_LEVELS; level++) { + if (base.overlapInLevel(level, smallestUserKey, largestUserKey)) { + maxLevelWithFiles = level; + } + } + } + finally { + mutex.unlock(); + } + testCompactMemTable(); // TODO: Skip if memtable does not overlap + for (int level = 0; level < maxLevelWithFiles; level++) { + testCompactRange(level, smallestUserKey, largestUserKey); + } + } + + @VisibleForTesting + void testCompactRange(int level, Slice begin, Slice end) throws DBException + { + checkArgument(level >= 0); + checkArgument(level + 1 < DbConstants.NUM_LEVELS); + + final InternalKey beginStorage = begin == null ? null : new InternalKey(begin, SequenceNumber.MAX_SEQUENCE_NUMBER, VALUE); + final InternalKey endStorage = end == null ? null : new InternalKey(end, 0, DELETION); + ManualCompaction manual = new ManualCompaction(level, beginStorage, endStorage); + mutex.lock(); + try { + while (!manual.done && !shuttingDown.get() && backgroundException == null) { + if (manualCompaction == null) { // Idle + manualCompaction = manual; + maybeScheduleCompaction(); + } + else { // Running either my compaction or another compaction. + backgroundCondition.awaitUninterruptibly(); + } + } + if (manualCompaction == manual) { + // Cancel my manual compaction since we aborted early for some reason. + manualCompaction = null; + } + } + finally { + mutex.unlock(); + } + } + + @VisibleForTesting + public void testCompactMemTable() throws DBException + { + // NULL batch means just wait for earlier writes to be done + writeInternal(null, new WriteOptions()); + // Wait until the compaction completes + mutex.lock(); + + try { + while (immutableMemTable != null && backgroundException == null) { + backgroundCondition.awaitUninterruptibly(); + } + if (immutableMemTable != null) { + if (backgroundException != null) { + throw new DBException(backgroundException); + } + } + } + finally { + mutex.unlock(); + } + } + + private class WriteBatchInternal + { + private final WriteBatchImpl batch; + private final boolean sync; + private final Condition backgroundCondition; + private boolean done = false; + + public WriteBatchInternal(WriteBatchImpl batch, boolean sync, Condition backgroundCondition) + { + this.batch = batch; + this.sync = sync; + this.backgroundCondition = backgroundCondition; + } + + public void await() + { + backgroundCondition.awaitUninterruptibly(); + } + + public void signal() + { + backgroundCondition.signal(); + } } } diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Env.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Env.java new file mode 100644 index 00000000..36989713 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Env.java @@ -0,0 +1,23 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +public interface Env +{ + long nowMicros(); +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/EnvImpl.java b/leveldb/src/main/java/org/iq80/leveldb/impl/EnvImpl.java new file mode 100644 index 00000000..10385917 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/EnvImpl.java @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import java.util.concurrent.TimeUnit; + +public class EnvImpl implements Env +{ + @Override + public long nowMicros() + { + return TimeUnit.NANOSECONDS.toMicros(System.nanoTime()); + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/FileChannelLogWriter.java b/leveldb/src/main/java/org/iq80/leveldb/impl/FileChannelLogWriter.java deleted file mode 100644 index 7547cedf..00000000 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/FileChannelLogWriter.java +++ /dev/null @@ -1,213 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.impl; - -import org.iq80.leveldb.util.Closeables; -import org.iq80.leveldb.util.Slice; -import org.iq80.leveldb.util.SliceInput; -import org.iq80.leveldb.util.SliceOutput; -import org.iq80.leveldb.util.Slices; - -import java.io.File; -import java.io.FileNotFoundException; -import java.io.FileOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.concurrent.atomic.AtomicBoolean; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static java.util.Objects.requireNonNull; -import static org.iq80.leveldb.impl.LogConstants.BLOCK_SIZE; -import static org.iq80.leveldb.impl.LogConstants.HEADER_SIZE; - -public class FileChannelLogWriter - implements LogWriter -{ - private final File file; - private final long fileNumber; - private final FileChannel fileChannel; - private final AtomicBoolean closed = new AtomicBoolean(); - - /** - * Current offset in the current block - */ - private int blockOffset; - - public FileChannelLogWriter(File file, long fileNumber) - throws FileNotFoundException - { - requireNonNull(file, "file is null"); - checkArgument(fileNumber >= 0, "fileNumber is negative"); - - this.file = file; - this.fileNumber = fileNumber; - this.fileChannel = new FileOutputStream(file).getChannel(); - } - - @Override - public boolean isClosed() - { - return closed.get(); - } - - @Override - public synchronized void close() - { - closed.set(true); - - // try to forces the log to disk - try { - fileChannel.force(true); - } - catch (IOException ignored) { - } - - // close the channel - Closeables.closeQuietly(fileChannel); - } - - @Override - public synchronized void delete() - { - closed.set(true); - - // close the channel - Closeables.closeQuietly(fileChannel); - - // try to delete the file - file.delete(); - } - - @Override - public File getFile() - { - return file; - } - - @Override - public long getFileNumber() - { - return fileNumber; - } - - // Writes a stream of chunks such that no chunk is split across a block boundary - @Override - public synchronized void addRecord(Slice record, boolean force) - throws IOException - { - checkState(!closed.get(), "Log has been closed"); - - SliceInput sliceInput = record.input(); - - // used to track first, middle and last blocks - boolean begin = true; - - // Fragment the record int chunks as necessary and write it. Note that if record - // is empty, we still want to iterate once to write a single - // zero-length chunk. - do { - int bytesRemainingInBlock = BLOCK_SIZE - blockOffset; - checkState(bytesRemainingInBlock >= 0); - - // Switch to a new block if necessary - if (bytesRemainingInBlock < HEADER_SIZE) { - if (bytesRemainingInBlock > 0) { - // Fill the rest of the block with zeros - // todo lame... need a better way to write zeros - fileChannel.write(ByteBuffer.allocate(bytesRemainingInBlock)); - } - blockOffset = 0; - bytesRemainingInBlock = BLOCK_SIZE - blockOffset; - } - - // Invariant: we never leave less than HEADER_SIZE bytes available in a block - int bytesAvailableInBlock = bytesRemainingInBlock - HEADER_SIZE; - checkState(bytesAvailableInBlock >= 0); - - // if there are more bytes in the record then there are available in the block, - // fragment the record; otherwise write to the end of the record - boolean end; - int fragmentLength; - if (sliceInput.available() > bytesAvailableInBlock) { - end = false; - fragmentLength = bytesAvailableInBlock; - } - else { - end = true; - fragmentLength = sliceInput.available(); - } - - // determine block type - LogChunkType type; - if (begin && end) { - type = LogChunkType.FULL; - } - else if (begin) { - type = LogChunkType.FIRST; - } - else if (end) { - type = LogChunkType.LAST; - } - else { - type = LogChunkType.MIDDLE; - } - - // write the chunk - writeChunk(type, sliceInput.readSlice(fragmentLength)); - - // we are no longer on the first chunk - begin = false; - } while (sliceInput.isReadable()); - - if (force) { - fileChannel.force(false); - } - } - - private void writeChunk(LogChunkType type, Slice slice) - throws IOException - { - checkArgument(slice.length() <= 0xffff, "length %s is larger than two bytes", slice.length()); - checkArgument(blockOffset + HEADER_SIZE <= BLOCK_SIZE); - - // create header - Slice header = newLogRecordHeader(type, slice, slice.length()); - - // write the header and the payload - header.getBytes(0, fileChannel, header.length()); - slice.getBytes(0, fileChannel, slice.length()); - - blockOffset += HEADER_SIZE + slice.length(); - } - - private Slice newLogRecordHeader(LogChunkType type, Slice slice, int length) - { - int crc = Logs.getChunkChecksum(type.getPersistentId(), slice.getRawArray(), slice.getRawOffset(), length); - - // Format the header - SliceOutput header = Slices.allocate(HEADER_SIZE).output(); - header.writeInt(crc); - header.writeByte((byte) (length & 0xff)); - header.writeByte((byte) (length >>> 8)); - header.writeByte((byte) (type.getPersistentId())); - - return header.slice(); - } -} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Filename.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Filename.java index b12ec99a..e8f7d819 100755 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Filename.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Filename.java @@ -57,6 +57,13 @@ public static String logFileName(long number) * Return the name of the sstable with the specified number. */ public static String tableFileName(long number) + { + return makeFileName(number, "ldb"); + } + /** + * Return the deprecated name of the sstable with the specified number. + */ + public static String sstTableFileName(long number) { return makeFileName(number, "sst"); } diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/InsertIntoHandler.java b/leveldb/src/main/java/org/iq80/leveldb/impl/InsertIntoHandler.java new file mode 100644 index 00000000..cf777f3e --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/InsertIntoHandler.java @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.util.Slice; +import org.iq80.leveldb.util.Slices; + +import static org.iq80.leveldb.impl.ValueType.DELETION; +import static org.iq80.leveldb.impl.ValueType.VALUE; + +final class InsertIntoHandler + implements WriteBatchImpl.Handler +{ + private long sequence; + private final MemTable memTable; + + public InsertIntoHandler(MemTable memTable, long sequenceBegin) + { + this.memTable = memTable; + this.sequence = sequenceBegin; + } + + @Override + public void put(Slice key, Slice value) + { + memTable.add(sequence++, VALUE, key, value); + } + + @Override + public void delete(Slice key) + { + memTable.add(sequence++, DELETION, key, Slices.EMPTY_SLICE); + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/InternalFilterPolicy.java b/leveldb/src/main/java/org/iq80/leveldb/impl/InternalFilterPolicy.java new file mode 100644 index 00000000..5b92b3c3 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/InternalFilterPolicy.java @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import com.google.common.base.Function; +import com.google.common.collect.Lists; +import org.iq80.leveldb.XFilterPolicy; +import org.iq80.leveldb.util.Slice; + +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; + +/** + * Filter policy wrapper that converts from internal keys to user keys + *

+ *

+ * + * @author Honore Vasconcelos + */ +final class InternalFilterPolicy implements org.iq80.leveldb.table.FilterPolicy +{ + private static final Function EXTRACT_USER_KEY = InternalFilterPolicy::extractUserKey; + private org.iq80.leveldb.table.FilterPolicy userPolicy; + + private InternalFilterPolicy(org.iq80.leveldb.table.FilterPolicy userPolicy) + { + this.userPolicy = userPolicy; + } + + static InternalFilterPolicy convert(XFilterPolicy policy) + { + checkArgument(policy == null || policy instanceof org.iq80.leveldb.table.FilterPolicy, "Filter policy must implement Java interface FilterPolicy"); + if (policy instanceof InternalFilterPolicy) { + return (InternalFilterPolicy) policy; + } + return policy == null ? null : new InternalFilterPolicy((org.iq80.leveldb.table.FilterPolicy) policy); + } + + @Override + public String name() + { + return userPolicy.name(); + } + + @Override + public byte[] createFilter(final List keys) + { + //instead of copying all the keys to a shorter form, make it lazy + return userPolicy.createFilter(Lists.transform(keys, EXTRACT_USER_KEY)); + } + + @Override + public boolean keyMayMatch(Slice key, Slice filter) + { + return userPolicy.keyMayMatch(extractUserKey(key), filter); + } + + private static Slice extractUserKey(Slice key) + { + checkArgument(key.length() >= 8); + return key.slice(0, key.length() - 8); + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Iq80DBFactory.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Iq80DBFactory.java index 4a8244ac..400eddc0 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Iq80DBFactory.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Iq80DBFactory.java @@ -80,7 +80,7 @@ public class Iq80DBFactory public DB open(File path, Options options) throws IOException { - return new DbImpl(options, path); + return new DbImpl(options, path, new EnvImpl()); } @Override diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/KeyMatchingLookup.java b/leveldb/src/main/java/org/iq80/leveldb/impl/KeyMatchingLookup.java new file mode 100644 index 00000000..8de14380 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/KeyMatchingLookup.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.table.KeyValueFunction; +import org.iq80.leveldb.util.Slice; + +import static com.google.common.base.Preconditions.checkState; +import static org.iq80.leveldb.impl.ValueType.VALUE; + +/** + * @author Honore Vasconcelos + */ +public class KeyMatchingLookup implements KeyValueFunction +{ + private LookupKey key; + + KeyMatchingLookup(LookupKey key) + { + this.key = key; + } + + @Override + public LookupResult apply(Slice internalKey1, Slice value) + { + // parse the key in the block + checkState(internalKey1 != null, "Corrupt key for %s", key); + + final InternalKey internalKey = new InternalKey(internalKey1); + + // if this is a value key (not a delete) and the keys match, return the value + if (key.getUserKey().equals(internalKey.getUserKey())) { + if (internalKey.getValueType() == ValueType.DELETION) { + return LookupResult.deleted(key); + } + else if (internalKey.getValueType() == VALUE) { + return LookupResult.ok(key, value); + } + } + return null; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Level.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Level.java index 3b2b1147..d11c318f 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Level.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Level.java @@ -17,21 +17,19 @@ */ package org.iq80.leveldb.impl; -import com.google.common.collect.Lists; +import com.google.common.annotations.VisibleForTesting; import org.iq80.leveldb.table.UserComparator; -import org.iq80.leveldb.util.InternalTableIterator; +import org.iq80.leveldb.util.InternalIterator; +import org.iq80.leveldb.util.Level0Iterator; import org.iq80.leveldb.util.LevelIterator; import org.iq80.leveldb.util.Slice; import java.util.ArrayList; -import java.util.Collections; +import java.util.Collection; import java.util.Comparator; import java.util.List; -import java.util.Map.Entry; import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static java.nio.charset.StandardCharsets.UTF_8; import static java.util.Objects.requireNonNull; import static org.iq80.leveldb.impl.SequenceNumber.MAX_SEQUENCE_NUMBER; import static org.iq80.leveldb.impl.ValueType.VALUE; @@ -40,12 +38,13 @@ public class Level implements SeekingIterable { + private static final Comparator NEWEST_FIRST = (fileMetaData, fileMetaData1) -> (int) (fileMetaData1.getNumber() - fileMetaData.getNumber()); private final int levelNumber; private final TableCache tableCache; private final InternalKeyComparator internalKeyComparator; private final List files; - public Level(int levelNumber, List files, TableCache tableCache, InternalKeyComparator internalKeyComparator) + public Level(int levelNumber, Collection files, TableCache tableCache, InternalKeyComparator internalKeyComparator) { checkArgument(levelNumber >= 0, "levelNumber is negative"); requireNonNull(files, "files is null"); @@ -55,7 +54,6 @@ public Level(int levelNumber, List files, TableCache tableCache, I this.files = new ArrayList<>(files); this.tableCache = tableCache; this.internalKeyComparator = internalKeyComparator; - checkArgument(levelNumber >= 0, "levelNumber is negative"); this.levelNumber = levelNumber; } @@ -70,9 +68,14 @@ public List getFiles() } @Override - public LevelIterator iterator() + public InternalIterator iterator() { - return createLevelConcatIterator(tableCache, files, internalKeyComparator); + if (levelNumber == 0) { + return new Level0Iterator(tableCache, files, internalKeyComparator); + } + else { + return createLevelConcatIterator(tableCache, files, internalKeyComparator); + } } public static LevelIterator createLevelConcatIterator(TableCache tableCache, List files, InternalKeyComparator internalKeyComparator) @@ -94,10 +97,14 @@ public LookupResult get(LookupKey key, ReadStats readStats) fileMetaDataList.add(fileMetaData); } } + if (fileMetaDataList.isEmpty()) { + return null; + } + fileMetaDataList.sort(NEWEST_FIRST); } else { // Binary search to find earliest index whose largest key >= ikey. - int index = ceilingEntryIndex(Lists.transform(files, FileMetaData::getLargest), key.getInternalKey(), internalKeyComparator); + int index = findFile(key.getInternalKey()); // did we find any files that could contain the key? if (index >= files.size()) { @@ -127,61 +134,65 @@ public LookupResult get(LookupKey key, ReadStats readStats) lastFileRead = fileMetaData; lastFileReadLevel = levelNumber; - // open the iterator - InternalTableIterator iterator = tableCache.newIterator(fileMetaData); - - // seek to the key - iterator.seek(key.getInternalKey()); - - if (iterator.hasNext()) { - // parse the key in the block - Entry entry = iterator.next(); - InternalKey internalKey = entry.getKey(); - checkState(internalKey != null, "Corrupt key for %s", key.getUserKey().toString(UTF_8)); - - // if this is a value key (not a delete) and the keys match, return the value - if (key.getUserKey().equals(internalKey.getUserKey())) { - if (internalKey.getValueType() == ValueType.DELETION) { - return LookupResult.deleted(key); - } - else if (internalKey.getValueType() == VALUE) { - return LookupResult.ok(key, entry.getValue()); - } - } + final LookupResult lookupResult = tableCache.get(key.getInternalKey().encode(), fileMetaData, new KeyMatchingLookup(key)); + if (lookupResult != null) { + return lookupResult; } } return null; } - private static int ceilingEntryIndex(List list, T key, Comparator comparator) + public boolean someFileOverlapsRange(boolean disjointSortedFiles, Slice smallestUserKey, Slice largestUserKey) { - int insertionPoint = Collections.binarySearch(list, key, comparator); - if (insertionPoint < 0) { - insertionPoint = -(insertionPoint + 1); + UserComparator userComparator = internalKeyComparator.getUserComparator(); + if (!disjointSortedFiles) { + // Need to check against all files + for (FileMetaData file : files) { + if (afterFile(userComparator, smallestUserKey, file) || + beforeFile(userComparator, largestUserKey, file)) { + // No overlap + } + else { + return true; // Overlap + } + } + return false; + } + int index = 0; + if (smallestUserKey != null) { + InternalKey smallestInternalKey = new InternalKey(smallestUserKey, MAX_SEQUENCE_NUMBER, VALUE); + index = findFile(smallestInternalKey); + } + + if (index >= files.size()) { + // beginning of range is after all files, so no overlap. + return false; } - return insertionPoint; + + return !beforeFile(userComparator, largestUserKey, files.get(index)); } - public boolean someFileOverlapsRange(Slice smallestUserKey, Slice largestUserKey) + private boolean beforeFile(UserComparator userComparator, Slice userKey, FileMetaData file) { - InternalKey smallestInternalKey = new InternalKey(smallestUserKey, MAX_SEQUENCE_NUMBER, VALUE); - int index = findFile(smallestInternalKey); - - UserComparator userComparator = internalKeyComparator.getUserComparator(); - return ((index < files.size()) && - userComparator.compare(largestUserKey, files.get(index).getSmallest().getUserKey()) >= 0); + // null userKey occurs after all keys and is therefore never before *f + return (userKey != null && + userComparator.compare(userKey, file.getSmallest().getUserKey()) < 0); } - private int findFile(InternalKey targetKey) + private boolean afterFile(UserComparator userComparator, Slice userKey, FileMetaData file) { - if (files.isEmpty()) { - return files.size(); - } + // NULL user_key occurs before all keys and is therefore never after *f + return (userKey != null && + userComparator.compare(userKey, file.getLargest().getUserKey()) > 0); + } + @VisibleForTesting + int findFile(InternalKey targetKey) + { // todo replace with Collections.binarySearch int left = 0; - int right = files.size() - 1; + int right = files.size(); // binary search restart positions to find the restart position immediately before the targetKey while (left < right) { diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Level0.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Level0.java deleted file mode 100644 index c1b7c9e7..00000000 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Level0.java +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.impl; - -import org.iq80.leveldb.table.UserComparator; -import org.iq80.leveldb.util.InternalTableIterator; -import org.iq80.leveldb.util.Level0Iterator; -import org.iq80.leveldb.util.Slice; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; -import java.util.List; -import java.util.Map.Entry; - -import static com.google.common.base.Preconditions.checkState; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Objects.requireNonNull; -import static org.iq80.leveldb.impl.SequenceNumber.MAX_SEQUENCE_NUMBER; -import static org.iq80.leveldb.impl.ValueType.VALUE; - -// todo this class should be immutable -public class Level0 - implements SeekingIterable -{ - private final TableCache tableCache; - private final InternalKeyComparator internalKeyComparator; - private final List files; - - public static final Comparator NEWEST_FIRST = new Comparator() - { - @Override - public int compare(FileMetaData fileMetaData, FileMetaData fileMetaData1) - { - return (int) (fileMetaData1.getNumber() - fileMetaData.getNumber()); - } - }; - - public Level0(List files, TableCache tableCache, InternalKeyComparator internalKeyComparator) - { - requireNonNull(files, "files is null"); - requireNonNull(tableCache, "tableCache is null"); - requireNonNull(internalKeyComparator, "internalKeyComparator is null"); - - this.files = new ArrayList<>(files); - this.tableCache = tableCache; - this.internalKeyComparator = internalKeyComparator; - } - - public int getLevelNumber() - { - return 0; - } - - public List getFiles() - { - return files; - } - - @Override - public Level0Iterator iterator() - { - return new Level0Iterator(tableCache, files, internalKeyComparator); - } - - public LookupResult get(LookupKey key, ReadStats readStats) - { - if (files.isEmpty()) { - return null; - } - - List fileMetaDataList = new ArrayList<>(files.size()); - for (FileMetaData fileMetaData : files) { - if (internalKeyComparator.getUserComparator().compare(key.getUserKey(), fileMetaData.getSmallest().getUserKey()) >= 0 && - internalKeyComparator.getUserComparator().compare(key.getUserKey(), fileMetaData.getLargest().getUserKey()) <= 0) { - fileMetaDataList.add(fileMetaData); - } - } - - Collections.sort(fileMetaDataList, NEWEST_FIRST); - - readStats.clear(); - for (FileMetaData fileMetaData : fileMetaDataList) { - // open the iterator - InternalTableIterator iterator = tableCache.newIterator(fileMetaData); - - // seek to the key - iterator.seek(key.getInternalKey()); - - if (iterator.hasNext()) { - // parse the key in the block - Entry entry = iterator.next(); - InternalKey internalKey = entry.getKey(); - checkState(internalKey != null, "Corrupt key for %s", key.getUserKey().toString(UTF_8)); - - // if this is a value key (not a delete) and the keys match, return the value - if (key.getUserKey().equals(internalKey.getUserKey())) { - if (internalKey.getValueType() == ValueType.DELETION) { - return LookupResult.deleted(key); - } - else if (internalKey.getValueType() == VALUE) { - return LookupResult.ok(key, entry.getValue()); - } - } - } - - if (readStats.getSeekFile() == null) { - // We have had more than one seek for this read. Charge the first file. - readStats.setSeekFile(fileMetaData); - readStats.setSeekFileLevel(0); - } - } - - return null; - } - - public boolean someFileOverlapsRange(Slice smallestUserKey, Slice largestUserKey) - { - InternalKey smallestInternalKey = new InternalKey(smallestUserKey, MAX_SEQUENCE_NUMBER, VALUE); - int index = findFile(smallestInternalKey); - - UserComparator userComparator = internalKeyComparator.getUserComparator(); - return ((index < files.size()) && - userComparator.compare(largestUserKey, files.get(index).getSmallest().getUserKey()) >= 0); - } - - private int findFile(InternalKey targetKey) - { - if (files.isEmpty()) { - return files.size(); - } - - // todo replace with Collections.binarySearch - int left = 0; - int right = files.size() - 1; - - // binary search restart positions to find the restart position immediately before the targetKey - while (left < right) { - int mid = (left + right) / 2; - - if (internalKeyComparator.compare(files.get(mid).getLargest(), targetKey) < 0) { - // Key at "mid.largest" is < "target". Therefore all - // files at or before "mid" are uninteresting. - left = mid + 1; - } - else { - // Key at "mid.largest" is >= "target". Therefore all files - // after "mid" are uninteresting. - right = mid; - } - } - return right; - } - - public void addFile(FileMetaData fileMetaData) - { - // todo remove mutation - files.add(fileMetaData); - } - - @Override - public String toString() - { - final StringBuilder sb = new StringBuilder(); - sb.append("Level0"); - sb.append("{files=").append(files); - sb.append('}'); - return sb.toString(); - } -} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/LogReader.java b/leveldb/src/main/java/org/iq80/leveldb/impl/LogReader.java index 85c125cb..736931bf 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/LogReader.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/LogReader.java @@ -17,6 +17,7 @@ */ package org.iq80.leveldb.impl; +import org.iq80.leveldb.util.SequentialFile; import org.iq80.leveldb.util.DynamicSliceOutput; import org.iq80.leveldb.util.Slice; import org.iq80.leveldb.util.SliceInput; @@ -24,7 +25,6 @@ import org.iq80.leveldb.util.Slices; import java.io.IOException; -import java.nio.channels.FileChannel; import static org.iq80.leveldb.impl.LogChunkType.BAD_CHUNK; import static org.iq80.leveldb.impl.LogChunkType.EOF; @@ -37,7 +37,7 @@ public class LogReader { - private final FileChannel fileChannel; + private final SequentialFile sequentialFile; private final LogMonitor monitor; @@ -83,9 +83,9 @@ public class LogReader */ private Slice currentChunk = Slices.EMPTY_SLICE; - public LogReader(FileChannel fileChannel, LogMonitor monitor, boolean verifyChecksums, long initialOffset) + public LogReader(SequentialFile sequentialFile, LogMonitor monitor, boolean verifyChecksums, long initialOffset) { - this.fileChannel = fileChannel; + this.sequentialFile = sequentialFile; this.monitor = monitor; this.verifyChecksums = verifyChecksums; this.initialOffset = initialOffset; @@ -118,7 +118,7 @@ private boolean skipToInitialBlock() // Skip to start of first block that can contain the initial record if (blockStartLocation > 0) { try { - fileChannel.position(blockStartLocation); + sequentialFile.skip(blockStartLocation); } catch (IOException e) { reportDrop(blockStartLocation, e); @@ -310,7 +310,7 @@ public boolean readNextBlock() // read the next full block while (blockScratch.writableBytes() > 0) { try { - int bytesRead = blockScratch.writeBytes(fileChannel, blockScratch.writableBytes()); + int bytesRead = sequentialFile.read(blockScratch.writableBytes(), blockScratch); if (bytesRead < 0) { // no more bytes to read eof = true; diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/LogWriter.java b/leveldb/src/main/java/org/iq80/leveldb/impl/LogWriter.java index d61febcb..10ea6ea7 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/LogWriter.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/LogWriter.java @@ -18,25 +18,172 @@ package org.iq80.leveldb.impl; import org.iq80.leveldb.util.Slice; +import org.iq80.leveldb.util.SliceInput; +import org.iq80.leveldb.util.SliceOutput; +import org.iq80.leveldb.util.Slices; +import org.iq80.leveldb.util.WritableFile; -import java.io.File; +import java.io.Closeable; import java.io.IOException; +import java.util.concurrent.atomic.AtomicBoolean; -public interface LogWriter +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; +import static java.util.Objects.requireNonNull; +import static org.iq80.leveldb.impl.LogConstants.BLOCK_SIZE; +import static org.iq80.leveldb.impl.LogConstants.HEADER_SIZE; +import static org.iq80.leveldb.impl.Logs.getChunkChecksum; + +public class LogWriter + implements Closeable { - boolean isClosed(); + private static final byte[] SA = new byte[HEADER_SIZE]; + private final WritableFile writableFile; + private final long fileNumber; + private final AtomicBoolean closed = new AtomicBoolean(); + + /** + * Current offset in the current block + */ + private int blockOffset; + + private LogWriter(long fileNumber, WritableFile file) + { + requireNonNull(file, "file is null"); + checkArgument(fileNumber >= 0, "fileNumber is negative"); + this.fileNumber = fileNumber; + this.writableFile = file; + } - void close() - throws IOException; + public static LogWriter createWriter(long fileNumber, WritableFile writableFile) + { + return new LogWriter(fileNumber, writableFile); + } - void delete() - throws IOException; + @Override + public void close() + throws IOException + { + closed.set(true); + writableFile.close(); - File getFile(); + } - long getFileNumber(); + public long getFileNumber() + { + return fileNumber; + } // Writes a stream of chunks such that no chunk is split across a block boundary - void addRecord(Slice record, boolean force) - throws IOException; + public void addRecord(Slice record, boolean force) + throws IOException + { + checkState(!closed.get(), "Log has been closed"); + + SliceInput sliceInput = record.input(); + + // used to track first, middle and last blocks + boolean begin = true; + + // Fragment the record int chunks as necessary and write it. Note that if record + // is empty, we still want to iterate once to write a single + // zero-length chunk. + do { + int bytesRemainingInBlock = BLOCK_SIZE - blockOffset; + checkState(bytesRemainingInBlock >= 0); + + // Switch to a new block if necessary + if (bytesRemainingInBlock < HEADER_SIZE) { + if (bytesRemainingInBlock > 0) { + // Fill the rest of the block with zeros + // todo lame... need a better way to write zeros + writableFile.append(new Slice(SA, 0, bytesRemainingInBlock)); + } + blockOffset = 0; + bytesRemainingInBlock = BLOCK_SIZE - blockOffset; + } + + // Invariant: we never leave less than HEADER_SIZE bytes available in a block + int bytesAvailableInBlock = bytesRemainingInBlock - HEADER_SIZE; + checkState(bytesAvailableInBlock >= 0); + + // if there are more bytes in the record then there are available in the block, + // fragment the record; otherwise write to the end of the record + boolean end; + int fragmentLength; + if (sliceInput.available() > bytesAvailableInBlock) { + end = false; + fragmentLength = bytesAvailableInBlock; + } + else { + end = true; + fragmentLength = sliceInput.available(); + } + + // determine block type + LogChunkType type; + if (begin && end) { + type = LogChunkType.FULL; + } + else if (begin) { + type = LogChunkType.FIRST; + } + else if (end) { + type = LogChunkType.LAST; + } + else { + type = LogChunkType.MIDDLE; + } + + // write the chunk + writeChunk(type, sliceInput.readBytes(fragmentLength)); + + // we are no longer on the first chunk + begin = false; + } while (sliceInput.isReadable()); + + if (force) { + writableFile.force(); + } + } + + private void writeChunk(LogChunkType type, Slice slice) + throws IOException + { + checkArgument(slice.length() <= 0xffff, "length %s is larger than two bytes", slice.length()); + checkArgument(blockOffset + HEADER_SIZE <= BLOCK_SIZE); + + // create header + Slice header = newLogRecordHeader(type, slice, slice.length()); + + // write the header and the payload + writableFile.append(header); + writableFile.append(slice); + + blockOffset += HEADER_SIZE + slice.length(); + } + + private static Slice newLogRecordHeader(LogChunkType type, Slice slice, int length) + { + int crc = getChunkChecksum(type.getPersistentId(), slice.getRawArray(), slice.getRawOffset(), length); + + // Format the header + Slice header = Slices.allocate(HEADER_SIZE); + SliceOutput sliceOutput = header.output(); + sliceOutput.writeInt(crc); + sliceOutput.writeByte((byte) (length & 0xff)); + sliceOutput.writeByte((byte) (length >>> 8)); + sliceOutput.writeByte((byte) (type.getPersistentId())); + + return header; + } + + @Override + public String toString() + { + return "LogWriter{" + + "writableFile=" + writableFile + + ", fileNumber=" + fileNumber + + '}'; + } } diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Logs.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Logs.java index 5bf617b8..77996a26 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Logs.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Logs.java @@ -17,27 +17,25 @@ */ package org.iq80.leveldb.impl; +import org.iq80.leveldb.util.MMWritableFile; import org.iq80.leveldb.util.PureJavaCrc32C; import org.iq80.leveldb.util.Slice; +import org.iq80.leveldb.util.UnbufferedWritableFile; import java.io.File; import java.io.IOException; public final class Logs { + private static final int PAGE_SIZE = 1024 * 1024; + private Logs() { } - public static LogWriter createLogWriter(File file, long fileNumber) - throws IOException + public static LogWriter createLogWriter(File file, long fileNumber, boolean allowMmapWrites) throws IOException { - if (Iq80DBFactory.USE_MMAP) { - return new MMapLogWriter(file, fileNumber); - } - else { - return new FileChannelLogWriter(file, fileNumber); - } + return LogWriter.createWriter(fileNumber, allowMmapWrites ? MMWritableFile.open(file, PAGE_SIZE) : UnbufferedWritableFile.open(file)); } public static int getChunkChecksum(int chunkTypeId, Slice slice) diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/MMapLogWriter.java b/leveldb/src/main/java/org/iq80/leveldb/impl/MMapLogWriter.java deleted file mode 100755 index b42e3ce3..00000000 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/MMapLogWriter.java +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.impl; - -import org.iq80.leveldb.util.ByteBufferSupport; -import org.iq80.leveldb.util.Closeables; -import org.iq80.leveldb.util.Slice; -import org.iq80.leveldb.util.SliceInput; -import org.iq80.leveldb.util.SliceOutput; -import org.iq80.leveldb.util.Slices; - -import java.io.File; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.MappedByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.channels.FileChannel.MapMode; -import java.util.concurrent.atomic.AtomicBoolean; - -import static com.google.common.base.Preconditions.checkArgument; -import static com.google.common.base.Preconditions.checkState; -import static java.util.Objects.requireNonNull; -import static org.iq80.leveldb.impl.LogConstants.BLOCK_SIZE; -import static org.iq80.leveldb.impl.LogConstants.HEADER_SIZE; -import static org.iq80.leveldb.impl.Logs.getChunkChecksum; - -public class MMapLogWriter - implements LogWriter -{ - private static final int PAGE_SIZE = 1024 * 1024; - - private final File file; - private final long fileNumber; - private final FileChannel fileChannel; - private final AtomicBoolean closed = new AtomicBoolean(); - private MappedByteBuffer mappedByteBuffer; - private long fileOffset; - /** - * Current offset in the current block - */ - private int blockOffset; - - public MMapLogWriter(File file, long fileNumber) - throws IOException - { - requireNonNull(file, "file is null"); - checkArgument(fileNumber >= 0, "fileNumber is negative"); - this.file = file; - this.fileNumber = fileNumber; - this.fileChannel = new RandomAccessFile(file, "rw").getChannel(); - mappedByteBuffer = fileChannel.map(MapMode.READ_WRITE, 0, PAGE_SIZE); - } - - @Override - public boolean isClosed() - { - return closed.get(); - } - - @Override - public synchronized void close() - throws IOException - { - closed.set(true); - - destroyMappedByteBuffer(); - - if (fileChannel.isOpen()) { - fileChannel.truncate(fileOffset); - } - - // close the channel - Closeables.closeQuietly(fileChannel); - } - - @Override - public synchronized void delete() - throws IOException - { - close(); - - // try to delete the file - file.delete(); - } - - private void destroyMappedByteBuffer() - { - if (mappedByteBuffer != null) { - fileOffset += mappedByteBuffer.position(); - unmap(); - } - mappedByteBuffer = null; - } - - @Override - public File getFile() - { - return file; - } - - @Override - public long getFileNumber() - { - return fileNumber; - } - - // Writes a stream of chunks such that no chunk is split across a block boundary - @Override - public synchronized void addRecord(Slice record, boolean force) - throws IOException - { - checkState(!closed.get(), "Log has been closed"); - - SliceInput sliceInput = record.input(); - - // used to track first, middle and last blocks - boolean begin = true; - - // Fragment the record int chunks as necessary and write it. Note that if record - // is empty, we still want to iterate once to write a single - // zero-length chunk. - do { - int bytesRemainingInBlock = BLOCK_SIZE - blockOffset; - checkState(bytesRemainingInBlock >= 0); - - // Switch to a new block if necessary - if (bytesRemainingInBlock < HEADER_SIZE) { - if (bytesRemainingInBlock > 0) { - // Fill the rest of the block with zeros - // todo lame... need a better way to write zeros - ensureCapacity(bytesRemainingInBlock); - mappedByteBuffer.put(new byte[bytesRemainingInBlock]); - } - blockOffset = 0; - bytesRemainingInBlock = BLOCK_SIZE - blockOffset; - } - - // Invariant: we never leave less than HEADER_SIZE bytes available in a block - int bytesAvailableInBlock = bytesRemainingInBlock - HEADER_SIZE; - checkState(bytesAvailableInBlock >= 0); - - // if there are more bytes in the record then there are available in the block, - // fragment the record; otherwise write to the end of the record - boolean end; - int fragmentLength; - if (sliceInput.available() > bytesAvailableInBlock) { - end = false; - fragmentLength = bytesAvailableInBlock; - } - else { - end = true; - fragmentLength = sliceInput.available(); - } - - // determine block type - LogChunkType type; - if (begin && end) { - type = LogChunkType.FULL; - } - else if (begin) { - type = LogChunkType.FIRST; - } - else if (end) { - type = LogChunkType.LAST; - } - else { - type = LogChunkType.MIDDLE; - } - - // write the chunk - writeChunk(type, sliceInput.readBytes(fragmentLength)); - - // we are no longer on the first chunk - begin = false; - } while (sliceInput.isReadable()); - - if (force) { - mappedByteBuffer.force(); - } - } - - private void writeChunk(LogChunkType type, Slice slice) - throws IOException - { - checkArgument(slice.length() <= 0xffff, "length %s is larger than two bytes", slice.length()); - checkArgument(blockOffset + HEADER_SIZE <= BLOCK_SIZE); - - // create header - Slice header = newLogRecordHeader(type, slice); - - // write the header and the payload - ensureCapacity(header.length() + slice.length()); - header.getBytes(0, mappedByteBuffer); - slice.getBytes(0, mappedByteBuffer); - - blockOffset += HEADER_SIZE + slice.length(); - } - - private void ensureCapacity(int bytes) - throws IOException - { - if (mappedByteBuffer.remaining() < bytes) { - // remap - fileOffset += mappedByteBuffer.position(); - unmap(); - - mappedByteBuffer = fileChannel.map(MapMode.READ_WRITE, fileOffset, PAGE_SIZE); - } - } - - private void unmap() - { - ByteBufferSupport.unmap(mappedByteBuffer); - } - - private static Slice newLogRecordHeader(LogChunkType type, Slice slice) - { - int crc = getChunkChecksum(type.getPersistentId(), slice.getRawArray(), slice.getRawOffset(), slice.length()); - - // Format the header - Slice header = Slices.allocate(HEADER_SIZE); - SliceOutput sliceOutput = header.output(); - sliceOutput.writeInt(crc); - sliceOutput.writeByte((byte) (slice.length() & 0xff)); - sliceOutput.writeByte((byte) (slice.length() >>> 8)); - sliceOutput.writeByte((byte) (type.getPersistentId())); - - return header; - } -} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/SeekingIterator.java b/leveldb/src/main/java/org/iq80/leveldb/impl/SeekingIterator.java index 2d247bf2..9d60ad28 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/SeekingIterator.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/SeekingIterator.java @@ -30,7 +30,7 @@ public interface SeekingIterator void seekToFirst(); /** - * Repositions the iterator so the key of the next BlockElement returned greater than or equal to the specified targetKey. + * Repositions the iterator so the key of the next element returned greater than or equal to the specified targetKey. */ void seek(K targetKey); } diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotImpl.java b/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotImpl.java deleted file mode 100644 index 8e6fb368..00000000 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotImpl.java +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.impl; - -import org.iq80.leveldb.Snapshot; - -import java.util.concurrent.atomic.AtomicBoolean; - -public class SnapshotImpl - implements Snapshot -{ - private final AtomicBoolean closed = new AtomicBoolean(); - private final Version version; - private final long lastSequence; - - SnapshotImpl(Version version, long lastSequence) - { - this.version = version; - this.lastSequence = lastSequence; - this.version.retain(); - } - - @Override - public void close() - { - // This is an end user API.. he might screw up and close multiple times. - // but we don't want the version reference count going bad. - if (closed.compareAndSet(false, true)) { - this.version.release(); - } - } - - public long getLastSequence() - { - return lastSequence; - } - - public Version getVersion() - { - return version; - } - - @Override - public String toString() - { - return Long.toString(lastSequence); - } - - @Override - public boolean equals(Object o) - { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - - SnapshotImpl snapshot = (SnapshotImpl) o; - - if (lastSequence != snapshot.lastSequence) { - return false; - } - if (!version.equals(snapshot.version)) { - return false; - } - - return true; - } - - @Override - public int hashCode() - { - int result = version.hashCode(); - result = 31 * result + (int) (lastSequence ^ (lastSequence >>> 32)); - return result; - } -} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotList.java b/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotList.java new file mode 100644 index 00000000..8fd89ca9 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotList.java @@ -0,0 +1,132 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.Snapshot; + +import java.util.concurrent.locks.ReentrantLock; + +import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Preconditions.checkState; + +/** + * Snapshots are kept in a doubly-linked list in the DB. + * Each Snapshot corresponds to a particular sequence number. + */ +final class SnapshotList +{ + private final ReentrantLock mutex; + private final SnapshotNode list; + + /** + * Snapshot list where all operation are protected by {@ode mutex}. + * All {@code mutex} acquisition mut be done externally to ensure sequence order. + * + * @param mutex protect concurrent read/write to this list + */ + public SnapshotList(ReentrantLock mutex) + { + this.mutex = mutex; + this.list = new SnapshotNode(0); + this.list.next = this.list; + this.list.prev = this.list; + } + + /** + * Track a new snapshot for {@code sequence}. + * + * @param sequence most actual version sequence available + * @return new a new tracked snapshot for {@code sequence} + * @throws IllegalStateException if mutex is not held by current thread + */ + public Snapshot newSnapshot(long sequence) + { + checkState(mutex.isHeldByCurrentThread()); + SnapshotNode s = new SnapshotNode(sequence); + s.next = this.list; + s.prev = list.prev; + s.prev.next = s; + s.next.prev = s; + return s; + } + + /** + * Return {@code true} if list is empty + * + * @return Return {@code true} if list is empty + * @throws IllegalStateException if mutex is not held by current thread + */ + public boolean isEmpty() + { + checkState(mutex.isHeldByCurrentThread()); + return list.next == list; + } + + /** + * Return oldest sequence number of this list + * + * @return oldest sequence number + * @throws IllegalStateException if mutex is not held by current thread or list is empty + */ + public long getOldest() + { + checkState(mutex.isHeldByCurrentThread()); + checkState(!isEmpty()); + return list.next.number; + } + + /** + * Return sequence corresponding to given snapshot. + * + * @param snapshot snapshot to read from + * @return Return sequence corresponding to given snapshot. + * @throws IllegalArgumentException if snapshot concrete type does not come from current list + * @throws IllegalStateException if mutex is not held by current thread + */ + public long getSequenceFrom(Snapshot snapshot) + { + checkArgument(snapshot instanceof SnapshotNode); + checkState(mutex.isHeldByCurrentThread()); + return ((SnapshotNode) snapshot).number; + } + + private final class SnapshotNode implements Snapshot + { + private final long number; + private SnapshotNode next; + private SnapshotNode prev; + + private SnapshotNode(long number) + { + this.number = number; + } + + @Override + public void close() + { + mutex.lock(); + try { + this.prev.next = this.next; + this.next.prev = this.prev; + } + finally { + mutex.unlock(); + } + } + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotSeekingIterator.java b/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotSeekingIterator.java index ba4649d7..65b51e10 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotSeekingIterator.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/SnapshotSeekingIterator.java @@ -26,83 +26,106 @@ import java.util.Map.Entry; public final class SnapshotSeekingIterator - extends AbstractSeekingIterator + extends AbstractSeekingIterator implements AutoCloseable { private final DbIterator iterator; - private final SnapshotImpl snapshot; + private final long sequence; private final Comparator userComparator; - public SnapshotSeekingIterator(DbIterator iterator, SnapshotImpl snapshot, Comparator userComparator) + public SnapshotSeekingIterator(DbIterator iterator, long sequence, Comparator userComparator) { this.iterator = iterator; - this.snapshot = snapshot; + this.sequence = sequence; this.userComparator = userComparator; - this.snapshot.getVersion().retain(); } + @Override public void close() { - this.snapshot.getVersion().release(); + next = null; + iterator.close(); } @Override protected void seekToFirstInternal() { + next = null; iterator.seekToFirst(); - findNextUserEntry(null); + findNextUserEntry(); } @Override protected void seekInternal(Slice targetKey) { - iterator.seek(new InternalKey(targetKey, snapshot.getLastSequence(), ValueType.VALUE)); - findNextUserEntry(null); + next = null; + iterator.seek(new InternalKey(targetKey, sequence, ValueType.VALUE)); + findNextUserEntry(); } @Override protected Entry getNextElement() { - if (!iterator.hasNext()) { + if (this.next == null && !iterator.hasNext()) { return null; } - - Entry next = iterator.next(); - // find the next user entry after the key we are about to return - findNextUserEntry(next.getKey().getUserKey()); - - return Maps.immutableEntry(next.getKey().getUserKey(), next.getValue()); + findNextUserEntry(); + if (next != null) { + Entry next = this.next; + this.next = null; + return Maps.immutableEntry(next.getKey().getUserKey(), next.getValue()); + } + return null; } - private void findNextUserEntry(Slice deletedKey) + Entry next; + + private void findNextUserEntry() { + if (next != null) { + return; + } // if there are no more entries, we are done if (!iterator.hasNext()) { return; } - - do { - // Peek the next entry and parse the key - InternalKey internalKey = iterator.peek().getKey(); - + //todo optimize algorithm. we should not do early load when called from #seekX(y) + while (iterator.hasNext()) { + Entry next = iterator.next(); + InternalKey key = next.getKey(); // skip entries created after our snapshot - if (internalKey.getSequenceNumber() > snapshot.getLastSequence()) { - iterator.next(); + if (key.getSequenceNumber() > sequence) { continue; } - - // if the next entry is a deletion, skip all subsequent entries for that key - if (internalKey.getValueType() == ValueType.DELETION) { - deletedKey = internalKey.getUserKey(); + if (key.getValueType() == ValueType.DELETION) { + while (iterator.hasNext()) { + Entry peek = iterator.peek(); + if (peek.getKey().getValueType() == ValueType.DELETION) { + break; //handled by next loop + } + else if (peek.getKey().getValueType() == ValueType.VALUE && userComparator.compare(key.getUserKey(), peek.getKey().getUserKey()) == 0) { + iterator.next(); // Entry hidden + } + else { + break; //different key + } + } } - else if (internalKey.getValueType() == ValueType.VALUE) { - // is this value masked by a prior deletion record? - if (deletedKey == null || userComparator.compare(internalKey.getUserKey(), deletedKey) > 0) { - return; + else if (key.getValueType() == ValueType.VALUE) { + while (iterator.hasNext()) { + Entry peek = iterator.peek(); + if (peek.getKey().getValueType() == ValueType.VALUE && userComparator.compare(key.getUserKey(), peek.getKey().getUserKey()) == 0) { + iterator.next(); // Entry hidden + } + else { + this.next = next; + return; + } } + this.next = next; + return; } - iterator.next(); - } while (iterator.hasNext()); + } } @Override @@ -110,7 +133,7 @@ public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("SnapshotSeekingIterator"); - sb.append("{snapshot=").append(snapshot); + sb.append("{sequence=").append(sequence); sb.append(", iterator=").append(iterator); sb.append('}'); return sb.toString(); diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/TableCache.java b/leveldb/src/main/java/org/iq80/leveldb/impl/TableCache.java index 34b5055e..f25ff407 100755 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/TableCache.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/TableCache.java @@ -22,18 +22,24 @@ import com.google.common.cache.LoadingCache; import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; -import org.iq80.leveldb.table.FileChannelTable; -import org.iq80.leveldb.table.MMapTable; +import org.iq80.leveldb.Options; +import org.iq80.leveldb.table.BlockHandle; +import org.iq80.leveldb.table.BlockHandleSliceWeigher; +import org.iq80.leveldb.table.FilterPolicy; +import org.iq80.leveldb.table.KeyValueFunction; import org.iq80.leveldb.table.Table; import org.iq80.leveldb.table.UserComparator; +import org.iq80.leveldb.util.Closeables; +import org.iq80.leveldb.util.UnbufferedRandomInputFile; import org.iq80.leveldb.util.Finalizer; import org.iq80.leveldb.util.InternalTableIterator; +import org.iq80.leveldb.util.LRUCache; +import org.iq80.leveldb.util.MMRandomInputFile; +import org.iq80.leveldb.util.RandomInputFile; import org.iq80.leveldb.util.Slice; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.nio.channels.FileChannel; import java.util.concurrent.ExecutionException; import static java.util.Objects.requireNonNull; @@ -42,11 +48,15 @@ public class TableCache { private final LoadingCache cache; private final Finalizer finalizer = new Finalizer<>(1); + private final LRUCache blockCache; - public TableCache(final File databaseDir, int tableCacheSize, final UserComparator userComparator, final boolean verifyChecksums) + public TableCache(final File databaseDir, + int tableCacheSize, + final UserComparator userComparator, + final Options options) { requireNonNull(databaseDir, "databaseName is null"); - + blockCache = new LRUCache<>(options.cacheSize() > 0 ? (int) options.cacheSize() : 8 << 20, new BlockHandleSliceWeigher()); //TODO add possibility to disable cache? cache = CacheBuilder.newBuilder() .maximumSize(tableCacheSize) .removalListener(new RemovalListener() @@ -54,8 +64,11 @@ public TableCache(final File databaseDir, int tableCacheSize, final UserComparat @Override public void onRemoval(RemovalNotification notification) { - Table table = notification.getValue().getTable(); - finalizer.addCleanup(table, table.closer()); + final TableAndFile value = notification.getValue(); + if (value != null) { + final Table table = value.getTable(); + finalizer.addCleanup(table, table.closer()); + } } }) .build(new CacheLoader() @@ -64,7 +77,7 @@ public void onRemoval(RemovalNotification notification) public TableAndFile load(Long fileNumber) throws IOException { - return new TableAndFile(databaseDir, fileNumber, userComparator, verifyChecksums); + return new TableAndFile(databaseDir, fileNumber, userComparator, options, blockCache); } }); } @@ -79,6 +92,13 @@ public InternalTableIterator newIterator(long number) return new InternalTableIterator(getTable(number).iterator()); } + public T get(Slice key, FileMetaData fileMetaData, KeyValueFunction resultBuilder) + { + final Table table = getTable(fileMetaData.getNumber()); + return table.internalGet(key, resultBuilder); + + } + public long getApproximateOffsetOf(FileMetaData file, Slice key) { return getTable(file.getNumber()).getApproximateOffsetOf(key); @@ -115,20 +135,41 @@ private static final class TableAndFile { private final Table table; - private TableAndFile(File databaseDir, long fileNumber, UserComparator userComparator, boolean verifyChecksums) + private TableAndFile(File databaseDir, long fileNumber, UserComparator userComparator, Options options, LRUCache blockCache) throws IOException { - String tableFileName = Filename.tableFileName(fileNumber); - File tableFile = new File(databaseDir, tableFileName); - try (FileInputStream fis = new FileInputStream(tableFile); - FileChannel fileChannel = fis.getChannel()) { - if (Iq80DBFactory.USE_MMAP) { - table = new MMapTable(tableFile.getAbsolutePath(), fileChannel, userComparator, verifyChecksums); + final File tableFile = tableFileName(databaseDir, fileNumber); + RandomInputFile source = null; + try { + if (options.allowMmapReads()) { + source = MMRandomInputFile.open(tableFile); } else { - table = new FileChannelTable(tableFile.getAbsolutePath(), fileChannel, userComparator, verifyChecksums); + source = UnbufferedRandomInputFile.open(tableFile); + } + final FilterPolicy filterPolicy = (FilterPolicy) options.filterPolicy(); + table = new Table(source, userComparator, + options.verifyChecksums(), blockCache, filterPolicy); + } + catch (IOException e) { + Closeables.closeQuietly(source); + throw e; + } + } + + private File tableFileName(File databaseDir, long fileNumber) + { + final String tableFileName = Filename.tableFileName(fileNumber); + File tableFile = new File(databaseDir, tableFileName); + if (!tableFile.canRead()) { + // attempt to open older .sst extension + final String sstFileName = Filename.sstTableFileName(fileNumber); + final File sstPath = new File(databaseDir, sstFileName); + if (sstPath.canRead()) { + tableFile = sstPath; } } + return tableFile; } public Table getTable() diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/ValueHolder.java b/leveldb/src/main/java/org/iq80/leveldb/impl/ValueHolder.java new file mode 100644 index 00000000..38299f93 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/ValueHolder.java @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import java.util.Objects; + +/** + * Value holder for reference modification like in C++ + */ +final class ValueHolder +{ + private V value; + + public ValueHolder(V value) + { + this.value = value; + } + + /** + * Setter for property 'value'. + * + * @param value Value to set for property 'value'. + */ + public void setValue(V value) + { + this.value = value; + } + + /** + * Getter for property 'value'. + * + * @return Value for property 'value'. + */ + public V getValue() + { + return value; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ValueHolder that = (ValueHolder) o; + return Objects.equals(value, that.value); + } + + @Override + public int hashCode() + { + return Objects.hash(value); + } + + @Override + public String toString() + { + return "ValueHolder{value=" + value + '}'; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/Version.java b/leveldb/src/main/java/org/iq80/leveldb/impl/Version.java index 818542c5..10c167ac 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/Version.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/Version.java @@ -22,8 +22,6 @@ import com.google.common.collect.ImmutableMultimap; import com.google.common.collect.Multimap; import org.iq80.leveldb.util.InternalIterator; -import org.iq80.leveldb.util.InternalTableIterator; -import org.iq80.leveldb.util.LevelIterator; import org.iq80.leveldb.util.MergingIterator; import org.iq80.leveldb.util.Slice; @@ -35,7 +33,6 @@ import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkPositionIndex; import static com.google.common.collect.Ordering.natural; -import static java.util.Objects.requireNonNull; import static org.iq80.leveldb.impl.DbConstants.MAX_MEM_COMPACT_LEVEL; import static org.iq80.leveldb.impl.DbConstants.NUM_LEVELS; import static org.iq80.leveldb.impl.SequenceNumber.MAX_SEQUENCE_NUMBER; @@ -47,7 +44,6 @@ public class Version { private final AtomicInteger retained = new AtomicInteger(1); private final VersionSet versionSet; - private final Level0 level0; private final List levels; // move these mutable fields somewhere else @@ -60,11 +56,8 @@ public Version(VersionSet versionSet) { this.versionSet = versionSet; checkArgument(NUM_LEVELS > 1, "levels must be at least 2"); - - this.level0 = new Level0(new ArrayList(), getTableCache(), getInternalKeyComparator()); - Builder builder = ImmutableList.builder(); - for (int i = 1; i < NUM_LEVELS; i++) { + for (int i = 0; i < NUM_LEVELS; i++) { List files = new ArrayList<>(); builder.add(new Level(i, files, getTableCache(), getInternalKeyComparator())); } @@ -72,17 +65,10 @@ public Version(VersionSet versionSet) } - public void assertNoOverlappingFiles() - { - for (int level = 1; level < NUM_LEVELS; level++) { - assertNoOverlappingFiles(level); - } - } - public void assertNoOverlappingFiles(int level) { if (level > 0) { - Collection files = getFiles().asMap().get(level); + Collection files = getFiles(level); if (files != null) { long previousFileNumber = 0; InternalKey previousEnd = null; @@ -111,22 +97,22 @@ public final InternalKeyComparator getInternalKeyComparator() return versionSet.getInternalKeyComparator(); } - public synchronized int getCompactionLevel() + public int getCompactionLevel() { return compactionLevel; } - public synchronized void setCompactionLevel(int compactionLevel) + public void setCompactionLevel(int compactionLevel) { this.compactionLevel = compactionLevel; } - public synchronized double getCompactionScore() + public double getCompactionScore() { return compactionScore; } - public synchronized void setCompactionScore(double compactionScore) + public void setCompactionScore(double compactionScore) { this.compactionScore = compactionScore; } @@ -134,24 +120,13 @@ public synchronized void setCompactionScore(double compactionScore) @Override public MergingIterator iterator() { - Builder builder = ImmutableList.builder(); - builder.add(level0.iterator()); - builder.addAll(getLevelIterators()); - return new MergingIterator(builder.build(), getInternalKeyComparator()); - } - - List getLevel0Files() - { - Builder builder = ImmutableList.builder(); - for (FileMetaData file : level0.getFiles()) { - builder.add(getTableCache().newIterator(file)); - } - return builder.build(); + ImmutableList it = ImmutableList.copyOf(getLevelIterators()); + return new MergingIterator(it, getInternalKeyComparator()); } - List getLevelIterators() + List getLevelIterators() { - Builder builder = ImmutableList.builder(); + Builder builder = ImmutableList.builder(); for (Level level : levels) { if (!level.getFiles().isEmpty()) { builder.add(level.iterator()); @@ -160,22 +135,18 @@ List getLevelIterators() return builder.build(); } - public LookupResult get(LookupKey key) + public LookupResult get(LookupKey key, ReadStats readStats) { // We can search level-by-level since entries never hop across // levels. Therefore we are guaranteed that if we find data // in an smaller level, later levels are irrelevant. - ReadStats readStats = new ReadStats(); - LookupResult lookupResult = level0.get(key, readStats); - if (lookupResult == null) { - for (Level level : levels) { - lookupResult = level.get(key, readStats); - if (lookupResult != null) { - break; - } + LookupResult lookupResult = null; + for (Level level : levels) { + lookupResult = level.get(key, readStats); + if (lookupResult != null) { + break; } } - updateStats(readStats.getSeekFileLevel(), readStats.getSeekFile()); return lookupResult; } @@ -186,7 +157,7 @@ int pickLevelForMemTableOutput(Slice smallestUserKey, Slice largestUserKey) // Push to next level if there is no overlap in next level, // and the #bytes overlapping in the level after that are limited. InternalKey start = new InternalKey(smallestUserKey, MAX_SEQUENCE_NUMBER, ValueType.VALUE); - InternalKey limit = new InternalKey(largestUserKey, 0, ValueType.VALUE); + InternalKey limit = new InternalKey(largestUserKey, 0, ValueType.DELETION); while (level < MAX_MEM_COMPACT_LEVEL) { if (overlapInLevel(level + 1, smallestUserKey, largestUserKey)) { break; @@ -204,37 +175,23 @@ int pickLevelForMemTableOutput(Slice smallestUserKey, Slice largestUserKey) public boolean overlapInLevel(int level, Slice smallestUserKey, Slice largestUserKey) { checkPositionIndex(level, levels.size(), "Invalid level"); - requireNonNull(smallestUserKey, "smallestUserKey is null"); - requireNonNull(largestUserKey, "largestUserKey is null"); - - if (level == 0) { - return level0.someFileOverlapsRange(smallestUserKey, largestUserKey); - } - return levels.get(level - 1).someFileOverlapsRange(smallestUserKey, largestUserKey); + return levels.get(level).someFileOverlapsRange(level > 0, smallestUserKey, largestUserKey); } public int numberOfLevels() { - return levels.size() + 1; + return levels.size(); } public int numberOfFilesInLevel(int level) { - if (level == 0) { - return level0.getFiles().size(); - } - else { - return levels.get(level - 1).getFiles().size(); - } + return getFiles(level).size(); } public Multimap getFiles() { ImmutableMultimap.Builder builder = ImmutableMultimap.builder(); builder = builder.orderKeysBy(natural()); - - builder.putAll(0, level0.getFiles()); - for (Level level : levels) { builder.putAll(level.getLevelNumber(), level.getFiles()); } @@ -243,26 +200,19 @@ public Multimap getFiles() public List getFiles(int level) { - if (level == 0) { - return level0.getFiles(); - } - else { - return levels.get(level - 1).getFiles(); - } + return levels.get(level).getFiles(); } public void addFile(int level, FileMetaData fileMetaData) { - if (level == 0) { - level0.addFile(fileMetaData); - } - else { - levels.get(level - 1).addFile(fileMetaData); - } + levels.get(level).addFile(fileMetaData); } - private boolean updateStats(int seekFileLevel, FileMetaData seekFile) + public boolean updateStats(ReadStats readStats) { + final int seekFileLevel = readStats.getSeekFileLevel(); + final FileMetaData seekFile = readStats.getSeekFile(); + if (seekFile == null) { return false; } diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/VersionSet.java b/leveldb/src/main/java/org/iq80/leveldb/impl/VersionSet.java old mode 100755 new mode 100644 index f554d82d..90ab3e77 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/VersionSet.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/VersionSet.java @@ -28,12 +28,12 @@ import org.iq80.leveldb.util.InternalIterator; import org.iq80.leveldb.util.Level0Iterator; import org.iq80.leveldb.util.MergingIterator; +import org.iq80.leveldb.util.SequentialFile; +import org.iq80.leveldb.util.SequentialFileImpl; import org.iq80.leveldb.util.Slice; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -47,6 +47,7 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.locks.ReentrantLock; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Preconditions.checkState; @@ -77,16 +78,18 @@ public class VersionSet private final File databaseDir; private final TableCache tableCache; private final InternalKeyComparator internalKeyComparator; + private final boolean allowMmapWrites; private LogWriter descriptorLog; private final Map compactPointers = new TreeMap<>(); - public VersionSet(File databaseDir, TableCache tableCache, InternalKeyComparator internalKeyComparator) + public VersionSet(File databaseDir, TableCache tableCache, InternalKeyComparator internalKeyComparator, boolean allowMmapWrites) throws IOException { this.databaseDir = databaseDir; this.tableCache = tableCache; this.internalKeyComparator = internalKeyComparator; + this.allowMmapWrites = allowMmapWrites; appendVersion(new Version(this)); initializeIfNeeded(); @@ -104,7 +107,7 @@ private void initializeIfNeeded() edit.setNextFileNumber(nextFileNumber.get()); edit.setLastSequenceNumber(lastSequence); - LogWriter log = Logs.createLogWriter(new File(databaseDir, Filename.descriptorFileName(manifestFileNumber)), manifestFileNumber); + LogWriter log = Logs.createLogWriter(new File(databaseDir, Filename.descriptorFileName(manifestFileNumber)), manifestFileNumber, allowMmapWrites); try { writeSnapshot(log); log.addRecord(edit.encode(), false); @@ -141,7 +144,7 @@ private void appendVersion(Version version) requireNonNull(version, "version is null"); checkArgument(version != current, "version is the current version"); Version previous = current; - current = version; + current = version; //version already retained, create with retained = 1 activeVersions.put(version, new Object()); if (previous != null) { previous.release(); @@ -204,25 +207,20 @@ public MergingIterator makeInputIterator(Compaction c) // TODO(opt): use concatenating iterator for level-0 if there is no overlap List list = new ArrayList<>(); for (int which = 0; which < 2; which++) { - if (!c.getInputs()[which].isEmpty()) { + List files = c.input(which); + if (!files.isEmpty()) { if (c.getLevel() + which == 0) { - List files = c.getInputs()[which]; list.add(new Level0Iterator(tableCache, files, internalKeyComparator)); } else { // Create concatenating iterator for the files from this level - list.add(Level.createLevelConcatIterator(tableCache, c.getInputs()[which], internalKeyComparator)); + list.add(Level.createLevelConcatIterator(tableCache, files, internalKeyComparator)); } } } return new MergingIterator(list, internalKeyComparator); } - public LookupResult get(LookupKey key) - { - return current.get(key); - } - public boolean overlapInLevel(int level, Slice smallestUserKey, Slice largestUserKey) { return current.overlapInLevel(level, smallestUserKey, largestUserKey); @@ -249,7 +247,7 @@ public void setLastSequence(long newLastSequence) this.lastSequence = newLastSequence; } - public void logAndApply(VersionEdit edit) + public void logAndApply(VersionEdit edit, ReentrantLock mutex) throws IOException { if (edit.getLogNumber() != null) { @@ -268,31 +266,38 @@ public void logAndApply(VersionEdit edit) edit.setLastSequenceNumber(lastSequence); Version version = new Version(this); - Builder builder = new Builder(this, current); - builder.apply(edit); - builder.saveTo(version); + try (Builder builder = new Builder(this, current)) { + builder.apply(edit); + builder.saveTo(version); + } finalizeVersion(version); boolean createdNewManifest = false; + final long mFileNumber = manifestFileNumber; try { // Initialize new descriptor log file if necessary by creating // a temporary file that contains a snapshot of the current version. if (descriptorLog == null) { edit.setNextFileNumber(nextFileNumber.get()); - descriptorLog = Logs.createLogWriter(new File(databaseDir, Filename.descriptorFileName(manifestFileNumber)), manifestFileNumber); + descriptorLog = Logs.createLogWriter(new File(databaseDir, Filename.descriptorFileName(mFileNumber)), mFileNumber, allowMmapWrites); writeSnapshot(descriptorLog); createdNewManifest = true; } - - // Write new record to MANIFEST log - Slice record = edit.encode(); - descriptorLog.addRecord(record, true); - - // If we just created a new descriptor file, install it by writing a - // new CURRENT file that points to it. - if (createdNewManifest) { - Filename.setCurrentFile(databaseDir, descriptorLog.getFileNumber()); + mutex.unlock(); + try { + // Write new record to MANIFEST log + Slice record = edit.encode(); + descriptorLog.addRecord(record, true); + + // If we just created a new descriptor file, install it by writing a + // new CURRENT file that points to it. + if (createdNewManifest) { + Filename.setCurrentFile(databaseDir, mFileNumber); + } + } + finally { + mutex.lock(); } } catch (IOException e) { @@ -300,7 +305,7 @@ public void logAndApply(VersionEdit edit) if (createdNewManifest) { descriptorLog.close(); // todo add delete method to LogWriter - new File(databaseDir, Filename.logFileName(descriptorLog.getFileNumber())).delete(); + new File(databaseDir, Filename.logFileName(mFileNumber)).delete(); descriptorLog = null; } throw e; @@ -343,8 +348,7 @@ public void recover() currentName = currentName.substring(0, currentName.length() - 1); // open file channel - try (FileInputStream fis = new FileInputStream(new File(databaseDir, currentName)); - FileChannel fileChannel = fis.getChannel()) { + try (SequentialFile in = SequentialFileImpl.open(new File(databaseDir, currentName))) { // read log edit log Long nextFileNumber = null; Long lastSequence = null; @@ -352,7 +356,7 @@ public void recover() Long prevLogNumber = null; Builder builder = new Builder(this, current); - LogReader reader = new LogReader(fileChannel, throwExceptionMonitor(), true, 0); + LogReader reader = new LogReader(in, throwExceptionMonitor(), true, 0); for (Slice record = reader.readRecord(); record != null; record = reader.readRecord()) { // read version edit VersionEdit edit = new VersionEdit(record); @@ -394,6 +398,7 @@ public void recover() Version newVersion = new Version(this); builder.saveTo(newVersion); + builder.close(); // Install recovered version finalizeVersion(newVersion); @@ -622,20 +627,40 @@ private Compaction setupOtherInputs(int level, List levelInputs) List getOverlappingInputs(int level, InternalKey begin, InternalKey end) { - ImmutableList.Builder files = ImmutableList.builder(); - Slice userBegin = begin.getUserKey(); - Slice userEnd = end.getUserKey(); + List inputs = new ArrayList<>(); + Slice userBegin = begin == null ? null : begin.getUserKey(); + Slice userEnd = end == null ? null : end.getUserKey(); UserComparator userComparator = internalKeyComparator.getUserComparator(); - for (FileMetaData fileMetaData : current.getFiles(level)) { - if (userComparator.compare(fileMetaData.getLargest().getUserKey(), userBegin) < 0 || - userComparator.compare(fileMetaData.getSmallest().getUserKey(), userEnd) > 0) { - // Either completely before or after range; skip it + List filesInLevel = current.getFiles(level); + for (int i = 0; i < filesInLevel.size(); i++) { + FileMetaData fileMetaData = filesInLevel.get(i); + Slice fileStart = fileMetaData.getSmallest().getUserKey(); + Slice fileLimit = fileMetaData.getLargest().getUserKey(); + if (begin != null && userComparator.compare(fileLimit, userBegin) < 0) { + // "files1" is completely before specified range; skip it + } + else if (end != null && userComparator.compare(fileStart, userEnd) > 0) { + // "files1" is completely after specified range; skip it } else { - files.add(fileMetaData); + inputs.add(fileMetaData); + if (level == 0) { + // Level-0 files may overlap each other. So check if the newly + // added file has expanded the range. If so, restart search. + if (begin != null && userComparator.compare(fileStart, userBegin) < 0) { + userBegin = fileStart; + inputs.clear(); + i = -1; + } + else if (end != null && userComparator.compare(fileLimit, userEnd) > 0) { + userEnd = fileLimit; + inputs.clear(); + i = -1; + } + } } } - return files.build(); + return inputs; } private Entry getRange(List... inputLists) @@ -682,7 +707,7 @@ public long getMaxNextLevelOverlappingBytes() * of edits to a particular state without creating intermediate * Versions that contain full copies of the intermediate state. */ - private static class Builder + private static class Builder implements AutoCloseable { private final VersionSet versionSet; private final Version baseVersion; @@ -692,6 +717,7 @@ private Builder(VersionSet versionSet, Version baseVersion) { this.versionSet = versionSet; this.baseVersion = baseVersion; + baseVersion.retain(); levels = new ArrayList<>(baseVersion.numberOfLevels()); for (int i = 0; i < baseVersion.numberOfLevels(); i++) { @@ -759,7 +785,7 @@ public void saveTo(Version version) // Merge the set of added files with the set of pre-existing files. // Drop any deleted files. Store the result in *v. - Collection baseFiles = baseVersion.getFiles().asMap().get(level); + Collection baseFiles = baseVersion.getFiles(level); if (baseFiles == null) { baseFiles = ImmutableList.of(); } @@ -780,7 +806,7 @@ public void saveTo(Version version) //#ifndef NDEBUG todo // Make sure there is no overlap in levels > 0 - version.assertNoOverlappingFiles(); + version.assertNoOverlappingFiles(level); //#endif } } @@ -809,6 +835,12 @@ private void maybeAddFile(Version version, int level, FileMetaData fileMetaData) } } + @Override + public void close() + { + baseVersion.release(); + } + private static class FileMetaDataBySmallestKey implements Comparator { diff --git a/leveldb/src/main/java/org/iq80/leveldb/impl/WriteBatchImpl.java b/leveldb/src/main/java/org/iq80/leveldb/impl/WriteBatchImpl.java index 003a3cad..640aa222 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/impl/WriteBatchImpl.java +++ b/leveldb/src/main/java/org/iq80/leveldb/impl/WriteBatchImpl.java @@ -31,7 +31,11 @@ public class WriteBatchImpl implements WriteBatch { + // WriteBatch header has an 8-byte sequence number followed by a 4-byte count. + private static final int HEADER_SIZE = 12; + private final List> batch = new ArrayList<>(); + //TODO fix this count that is wrong!!! private int approximateSize; public int getApproximateSize() @@ -50,7 +54,7 @@ public WriteBatchImpl put(byte[] key, byte[] value) requireNonNull(key, "key is null"); requireNonNull(value, "value is null"); batch.add(Maps.immutableEntry(Slices.wrappedBuffer(key), Slices.wrappedBuffer(value))); - approximateSize += 12 + key.length + value.length; + approximateSize += HEADER_SIZE + key.length + value.length; return this; } @@ -59,7 +63,7 @@ public WriteBatchImpl put(Slice key, Slice value) requireNonNull(key, "key is null"); requireNonNull(value, "value is null"); batch.add(Maps.immutableEntry(key, value)); - approximateSize += 12 + key.length() + value.length(); + approximateSize += HEADER_SIZE + key.length() + value.length(); return this; } @@ -99,6 +103,18 @@ public void forEach(Handler handler) } } + public void append(WriteBatchImpl batch) + { + this.batch.addAll(batch.batch); + this.approximateSize += batch.approximateSize; + } + + public void clear() + { + approximateSize = 0; + batch.clear(); + } + public interface Handler { void put(Slice key, Slice value); diff --git a/leveldb/src/test/java/org/iq80/leveldb/table/FileChannelTableTest.java b/leveldb/src/main/java/org/iq80/leveldb/table/BlockHandleSliceWeigher.java similarity index 67% rename from leveldb/src/test/java/org/iq80/leveldb/table/FileChannelTableTest.java rename to leveldb/src/main/java/org/iq80/leveldb/table/BlockHandleSliceWeigher.java index 99c6021c..92043c39 100644 --- a/leveldb/src/test/java/org/iq80/leveldb/table/FileChannelTableTest.java +++ b/leveldb/src/main/java/org/iq80/leveldb/table/BlockHandleSliceWeigher.java @@ -15,21 +15,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + package org.iq80.leveldb.table; -import org.iq80.leveldb.util.Slice; +import com.google.common.cache.Weigher; -import java.io.IOException; -import java.nio.channels.FileChannel; -import java.util.Comparator; +import org.iq80.leveldb.util.Slice; -public class FileChannelTableTest - extends TableTest +/** + * @author Honore Vasconcelos + */ +public class BlockHandleSliceWeigher implements Weigher { @Override - protected Table createTable(String name, FileChannel fileChannel, Comparator comparator, boolean verifyChecksums) - throws IOException + public int weigh(BlockHandle key, Slice value) { - return new FileChannelTable(name, fileChannel, comparator, verifyChecksums); + //approximate weigher + return 64 + value.getRawArray().length; } } diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/BlockIterator.java b/leveldb/src/main/java/org/iq80/leveldb/table/BlockIterator.java index f91dd36f..e9b2d136 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/table/BlockIterator.java +++ b/leveldb/src/main/java/org/iq80/leveldb/table/BlockIterator.java @@ -112,6 +112,16 @@ public void seekToFirst() } } + public void seekToLast() + { + if (restartCount > 0) { + seekToRestartPosition(restartCount - 1); + while (peek() != null) { + next(); + } + } + } + /** * Repositions the iterator so the key of the next BlockElement returned greater than or equal to the specified targetKey. */ @@ -145,7 +155,7 @@ public void seek(Slice targetKey) // linear search (within restart block) for first key greater than or equal to targetKey for (seekToRestartPosition(left); nextEntry != null; next()) { - if (comparator.compare(peek().getKey(), targetKey) >= 0) { + if (comparator.compare(nextEntry.getKey(), targetKey) >= 0) { break; } } diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/BloomFilterPolicy.java b/leveldb/src/main/java/org/iq80/leveldb/table/BloomFilterPolicy.java new file mode 100644 index 00000000..da71c87d --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/table/BloomFilterPolicy.java @@ -0,0 +1,135 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.table; + +import org.iq80.leveldb.XFilterPolicy; +import org.iq80.leveldb.util.Hash; +import org.iq80.leveldb.util.Slice; + +import java.util.List; + +/** + * BloomFilter policy + * + * @author Honore Vasconcelos + * @link https://github.com/google/leveldb/commit/85584d497e7b354853b72f450683d59fcf6b9c5c + */ +public final class BloomFilterPolicy implements org.iq80.leveldb.table.FilterPolicy, XFilterPolicy +{ + private final int bitsPerKey; + private final int k; + + public BloomFilterPolicy(final int bitsPerKey) + { + this.bitsPerKey = bitsPerKey; + int k = (int) (bitsPerKey * 0.69); + if (k < 1) { + k = 1; + } + else if (k > 30) { + k = 30; + } + this.k = k; + } + + @Override + public String name() + { + return "leveldb.BuiltinBloomFilter2"; + } + + @Override + public byte[] createFilter(List keys) + { + // Compute bloom filter size (in both bits and bytes) + int bits = keys.size() * bitsPerKey; + + // For small n, we can see a very high false positive rate. Fix it + // by enforcing a minimum bloom filter length. + if (bits < 64) { + bits = 64; + } + + int bytes = (bits + 7) / 8; + bits = bytes * 8; + + final byte[] array = new byte[bytes + 1]; + array[array.length - 1] = (byte) k; // Remember # of probes in filter + + for (Slice key : keys) { + // Use double-hashing to generate a sequence of hash values. + // See analysis in [Kirsch,Mitzenmacher 2006]. + int h = bloomHash(key); + int delta = (h >>> 17) | (h << 15); // Rotate right 17 bits + for (int j = 0; j < k; j++) { + int bitpos = (int) ((toLong(h)) % bits); + final int i = bitpos / 8; + array[i] |= (1 << (bitpos % 8)); + h += delta; + } + } + return array; + } + + private int bloomHash(Slice data) + { + return Hash.hash(data.getRawArray(), data.getRawOffset(), data.length(), 0xbc9f1d34); //avoid data copy + } + + @Override + public boolean keyMayMatch(Slice key, Slice bloomFilter1) + { + int len = bloomFilter1.length(); + byte[] data = bloomFilter1.getRawArray(); + int offset = bloomFilter1.getRawOffset(); + if (len < 2) { + return false; + } + + int bits = (len - 1) * 8; + + // Use the encoded k so that we can read filters generated by + // bloom filters created using different parameters. + int k = data[offset + len - 1]; + if (k > 30) { + // Reserved for potentially new encodings for short bloom filters. + // Consider it a match. + return true; + } + + int h = bloomHash(key); + int delta = (h >>> 17) | (h << 15); // Rotate right 17 bits + for (int j = 0; j < k; j++) { + int bitpos = (int) (toLong(h) % bits); + if ((data[offset + (bitpos / 8)] & (1 << (bitpos % 8))) == 0) { + return false; + } + h += delta; + } + return true; + } + + /** + * Convert an unsigned int into a long + */ + private long toLong(int h) + { + return h & 0xffffffffL; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/FileChannelTable.java b/leveldb/src/main/java/org/iq80/leveldb/table/FileChannelTable.java deleted file mode 100644 index a5723387..00000000 --- a/leveldb/src/main/java/org/iq80/leveldb/table/FileChannelTable.java +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.table; - -import org.iq80.leveldb.util.Slice; -import org.iq80.leveldb.util.Slices; -import org.iq80.leveldb.util.Snappy; - -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.Comparator; - -import static org.iq80.leveldb.CompressionType.SNAPPY; - -public class FileChannelTable - extends Table -{ - public FileChannelTable(String name, FileChannel fileChannel, Comparator comparator, boolean verifyChecksums) - throws IOException - { - super(name, fileChannel, comparator, verifyChecksums); - } - - @Override - protected Footer init() - throws IOException - { - long size = fileChannel.size(); - ByteBuffer footerData = read(size - Footer.ENCODED_LENGTH, Footer.ENCODED_LENGTH); - return Footer.readFooter(Slices.copiedBuffer(footerData)); - } - - @SuppressWarnings({"AssignmentToStaticFieldFromInstanceMethod", "NonPrivateFieldAccessedInSynchronizedContext"}) - @Override - protected Block readBlock(BlockHandle blockHandle) - throws IOException - { - // read block trailer - ByteBuffer trailerData = read(blockHandle.getOffset() + blockHandle.getDataSize(), BlockTrailer.ENCODED_LENGTH); - BlockTrailer blockTrailer = BlockTrailer.readBlockTrailer(Slices.copiedBuffer(trailerData)); - -// todo re-enable crc check when ported to support direct buffers -// // only verify check sums if explicitly asked by the user -// if (verifyChecksums) { -// // checksum data and the compression type in the trailer -// PureJavaCrc32C checksum = new PureJavaCrc32C(); -// checksum.update(data.getRawArray(), data.getRawOffset(), blockHandle.getDataSize() + 1); -// int actualCrc32c = checksum.getMaskedValue(); -// -// checkState(blockTrailer.getCrc32c() == actualCrc32c, "Block corrupted: checksum mismatch"); -// } - - // decompress data - - ByteBuffer uncompressedBuffer = read(blockHandle.getOffset(), blockHandle.getDataSize()); - Slice uncompressedData; - if (blockTrailer.getCompressionType() == SNAPPY) { - synchronized (FileChannelTable.class) { - int uncompressedLength = uncompressedLength(uncompressedBuffer); - if (uncompressedScratch.capacity() < uncompressedLength) { - uncompressedScratch = ByteBuffer.allocateDirect(uncompressedLength); - } - uncompressedScratch.clear(); - - Snappy.uncompress(uncompressedBuffer, uncompressedScratch); - uncompressedData = Slices.copiedBuffer(uncompressedScratch); - } - } - else { - uncompressedData = Slices.copiedBuffer(uncompressedBuffer); - } - - return new Block(uncompressedData, comparator); - } - - private ByteBuffer read(long offset, int length) - throws IOException - { - ByteBuffer uncompressedBuffer = ByteBuffer.allocate(length); - fileChannel.read(uncompressedBuffer, offset); - if (uncompressedBuffer.hasRemaining()) { - throw new IOException("Could not read all the data"); - } - uncompressedBuffer.clear(); - return uncompressedBuffer; - } -} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/FilterBlockBuilder.java b/leveldb/src/main/java/org/iq80/leveldb/table/FilterBlockBuilder.java new file mode 100644 index 00000000..00d90bc7 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/table/FilterBlockBuilder.java @@ -0,0 +1,125 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.DynamicSliceOutput; +import org.iq80.leveldb.util.IntVector; +import org.iq80.leveldb.util.Slice; + +import java.util.ArrayList; +import java.util.List; + +import static com.google.common.base.Preconditions.checkArgument; + +/** + * The filter block stores a sequence of filters, where filter i contains + * the output of FilterPolicy::CreateFilter() on all keys that are stored + * in a block whose file offset falls within the range + *

+ * [ i*base ... (i+1)*base-1 ] + *

+ * Currently, "base" is 2KB. So for example, if blocks X and Y start in + * the range [ 0KB .. 2KB-1 ], all of the keys in X and Y will be + * converted to a filter by calling FilterPolicy::CreateFilter(), and the + * resulting filter will be stored as the first filter in the filter + * block. + *

+ * The filter block is formatted as follows: + *

+ * [filter 0] + * [filter 1] + * [filter 2] + * ... + * [filter N-1] + *

+ * [offset of filter 0] : 4 bytes + * [offset of filter 1] : 4 bytes + * [offset of filter 2] : 4 bytes + * ... + * [offset of filter N-1] : 4 bytes + *

+ * [offset of beginning of offset array] : 4 bytes + * lg(base) : 1 byte + *

+ *

+ * + * @author Honore Vasconcelos + */ +public class FilterBlockBuilder +{ + // Generate new filter every 2KB of data + private static final byte FILTER_BASE_LG = 11; + private static final int FILTER_BASE = 1 << FILTER_BASE_LG; + + private final List keys = new ArrayList<>(); + private final DynamicSliceOutput result = new DynamicSliceOutput(32); + private final IntVector filterOffsets = new IntVector(32); + private final FilterPolicy policy; + + public FilterBlockBuilder(FilterPolicy policy) + { + this.policy = policy; + } + + public void addKey(Slice key) + { + keys.add(key); + } + + public void startBlock(long blockOffset) + { + long filterIndex = blockOffset / FILTER_BASE; + checkArgument(filterIndex >= filterOffsets.size()); + while (filterIndex > filterOffsets.size()) { + generateFilter(); + } + } + + private void generateFilter() + { + final int numberOfKeys = keys.size(); + if (numberOfKeys == 0) { + //Fast path if there are no keys for this filter + filterOffsets.add(result.size()); + return; + } + filterOffsets.add(result.size()); + final byte[] filter = policy.createFilter(keys); + result.writeBytes(filter); + keys.clear(); + } + + public Slice finish() + { + if (!keys.isEmpty()) { + generateFilter(); + } + final int arrayOffset = result.size(); + filterOffsets.write(result); + result.writeInt(arrayOffset); //4 bytes + result.write(FILTER_BASE_LG); //1 byte + final Slice slice = result.slice(); + return slice; + } + + public String name() + { + return policy.name(); + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/FilterBlockReader.java b/leveldb/src/main/java/org/iq80/leveldb/table/FilterBlockReader.java new file mode 100644 index 00000000..0bcbabdb --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/table/FilterBlockReader.java @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.Slice; + +/** + * @author Honore Vasconcelos + */ +final class FilterBlockReader +{ + private final byte baseLg; + private final int num; + private final Slice contents; + private final int offset; + private final FilterPolicy filterPolicy; + + FilterBlockReader(FilterPolicy filterPolicy, Slice contents) + { + this.filterPolicy = filterPolicy; + final int n = contents.length(); + final int lgAndOffset = 5; + if (n < lgAndOffset) { //byte + int + this.baseLg = 0; + this.contents = null; + this.num = 0; + this.offset = 0; + return; + } + baseLg = contents.getByte(n - 1); + offset = contents.getInt(n - lgAndOffset); + if (offset > n - lgAndOffset) { + this.num = 0; + this.contents = null; + return; + } + num = (n - lgAndOffset - offset) / 4; + this.contents = contents; + } + + public boolean keyMayMatch(long offset1, Slice key) + { + final int index = (int) (offset1 >> baseLg); + if (index < num) { + final int start = contents.getInt(this.offset + index * 4); + final int limit = contents.getInt(this.offset + index * 4 + 4); + if (start <= limit && limit <= offset) { + Slice filter = contents.slice(start, limit - start); + return filterPolicy.keyMayMatch(key, filter); + } + else if (start == limit) { + // Empty filters do not match any keys + return false; + } + } + return true; // Errors are treated as potential matches + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/FilterPolicy.java b/leveldb/src/main/java/org/iq80/leveldb/table/FilterPolicy.java new file mode 100644 index 00000000..dd457f8e --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/table/FilterPolicy.java @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.Slice; + +import java.util.List; + +/** + * A database can be configured with a custom FilterPolicy object. + * This object is responsible for creating a small filter from a set + * of keys. These filters are stored in leveldb and are consulted + * automatically by leveldb to decide whether or not to read some + * information from disk. In many cases, a filter can cut down the + * number of disk seeks form a handful to a single disk seek per + * DB::Get() call. + *

+ * Most people will want to use the builtin bloom filter support (see + * NewBloomFilterPolicy() below). + * + * @author Honore Vasconcelos + */ +public interface FilterPolicy extends org.iq80.leveldb.XFilterPolicy +{ + String name(); + + /** + * Append a filter that summarizes keys[0,n-1] to *dst. + * + * @param keys keys[0,n-1] contains a list of keys (potentially with duplicates) + * that are ordered according to the user supplied comparator. + */ + byte[] createFilter(List keys); + + /** + * "filter" contains the data appended by a preceding call to + * CreateFilter() on this class. This method must return true if + * the key was in the list of keys passed to CreateFilter(). + * This method may return true or false if the key was not on the + * list, but it should aim to return false with a high probability. + */ + boolean keyMayMatch(Slice key, Slice filter); +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/KeyValueFunction.java b/leveldb/src/main/java/org/iq80/leveldb/table/KeyValueFunction.java new file mode 100644 index 00000000..ae9aa919 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/table/KeyValueFunction.java @@ -0,0 +1,36 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.Slice; + +/** + * @author Honore Vasconcelos + */ +public interface KeyValueFunction +{ + /** + * Function to apply on first entry after seeking in a table. + * + * @param internalKey internal key + * @param value associated value + * @return transformed key/value + */ + T apply(Slice internalKey, Slice value); +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/MMapTable.java b/leveldb/src/main/java/org/iq80/leveldb/table/MMapTable.java deleted file mode 100755 index 75b08a3b..00000000 --- a/leveldb/src/main/java/org/iq80/leveldb/table/MMapTable.java +++ /dev/null @@ -1,139 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.table; - -import org.iq80.leveldb.util.ByteBufferSupport; -import org.iq80.leveldb.util.Closeables; -import org.iq80.leveldb.util.Slice; -import org.iq80.leveldb.util.Slices; -import org.iq80.leveldb.util.Snappy; - -import java.io.Closeable; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.MappedByteBuffer; -import java.nio.channels.FileChannel; -import java.nio.channels.FileChannel.MapMode; -import java.util.Comparator; -import java.util.concurrent.Callable; - -import static com.google.common.base.Preconditions.checkArgument; -import static org.iq80.leveldb.CompressionType.SNAPPY; - -public class MMapTable - extends Table -{ - private MappedByteBuffer data; - - public MMapTable(String name, FileChannel fileChannel, Comparator comparator, boolean verifyChecksums) - throws IOException - { - super(name, fileChannel, comparator, verifyChecksums); - checkArgument(fileChannel.size() <= Integer.MAX_VALUE, "File must be smaller than %s bytes", Integer.MAX_VALUE); - } - - @Override - protected Footer init() - throws IOException - { - long size = fileChannel.size(); - data = fileChannel.map(MapMode.READ_ONLY, 0, size); - Slice footerSlice = Slices.copiedBuffer(data, (int) size - Footer.ENCODED_LENGTH, Footer.ENCODED_LENGTH); - return Footer.readFooter(footerSlice); - } - - @Override - public Callable closer() - { - return new Closer(name, fileChannel, data); - } - - private static class Closer - implements Callable - { - private final String name; - private final Closeable closeable; - private final MappedByteBuffer data; - - public Closer(String name, Closeable closeable, MappedByteBuffer data) - { - this.name = name; - this.closeable = closeable; - this.data = data; - } - - public Void call() - { - ByteBufferSupport.unmap(data); - Closeables.closeQuietly(closeable); - return null; - } - } - - @SuppressWarnings({"NonPrivateFieldAccessedInSynchronizedContext", "AssignmentToStaticFieldFromInstanceMethod"}) - @Override - protected Block readBlock(BlockHandle blockHandle) - throws IOException - { - // read block trailer - BlockTrailer blockTrailer = BlockTrailer.readBlockTrailer(Slices.copiedBuffer(this.data, - (int) blockHandle.getOffset() + blockHandle.getDataSize(), - BlockTrailer.ENCODED_LENGTH)); - -// todo re-enable crc check when ported to support direct buffers -// // only verify check sums if explicitly asked by the user -// if (verifyChecksums) { -// // checksum data and the compression type in the trailer -// PureJavaCrc32C checksum = new PureJavaCrc32C(); -// checksum.update(data.getRawArray(), data.getRawOffset(), blockHandle.getDataSize() + 1); -// int actualCrc32c = checksum.getMaskedValue(); -// -// checkState(blockTrailer.getCrc32c() == actualCrc32c, "Block corrupted: checksum mismatch"); -// } - - // decompress data - Slice uncompressedData; - ByteBuffer uncompressedBuffer = read(this.data, (int) blockHandle.getOffset(), blockHandle.getDataSize()); - if (blockTrailer.getCompressionType() == SNAPPY) { - synchronized (MMapTable.class) { - int uncompressedLength = uncompressedLength(uncompressedBuffer); - if (uncompressedScratch.capacity() < uncompressedLength) { - uncompressedScratch = ByteBuffer.allocateDirect(uncompressedLength); - } - uncompressedScratch.clear(); - - Snappy.uncompress(uncompressedBuffer, uncompressedScratch); - uncompressedData = Slices.copiedBuffer(uncompressedScratch); - } - } - else { - uncompressedData = Slices.copiedBuffer(uncompressedBuffer); - } - - return new Block(uncompressedData, comparator); - } - - public static ByteBuffer read(MappedByteBuffer data, int offset, int length) - throws IOException - { - int newPosition = data.position() + offset; - ByteBuffer block = (ByteBuffer) data.duplicate().order(ByteOrder.LITTLE_ENDIAN).clear().limit(newPosition + length).position(newPosition); - return block; - } -} diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/Table.java b/leveldb/src/main/java/org/iq80/leveldb/table/Table.java index 37ee951c..bff8307c 100755 --- a/leveldb/src/main/java/org/iq80/leveldb/table/Table.java +++ b/leveldb/src/main/java/org/iq80/leveldb/table/Table.java @@ -18,53 +18,102 @@ package org.iq80.leveldb.table; import com.google.common.base.Throwables; +import com.google.common.cache.CacheLoader; import org.iq80.leveldb.impl.SeekingIterable; -import org.iq80.leveldb.util.Closeables; +import org.iq80.leveldb.util.LRUCache; +import org.iq80.leveldb.util.RandomInputFile; import org.iq80.leveldb.util.Slice; +import org.iq80.leveldb.util.Slices; +import org.iq80.leveldb.util.Snappy; import org.iq80.leveldb.util.TableIterator; import org.iq80.leveldb.util.VariableLengthQuantity; -import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; +import java.nio.charset.Charset; import java.util.Comparator; import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import static com.google.common.base.Preconditions.checkArgument; import static java.util.Objects.requireNonNull; +import static org.iq80.leveldb.CompressionType.SNAPPY; -public abstract class Table +public final class Table implements SeekingIterable { - protected final String name; - protected final FileChannel fileChannel; - protected final Comparator comparator; - protected final boolean verifyChecksums; - protected final Block indexBlock; - protected final BlockHandle metaindexBlockHandle; - - public Table(String name, FileChannel fileChannel, Comparator comparator, boolean verifyChecksums) + private static final Charset CHARSET = Charset.forName("UTF-8"); + private final Comparator comparator; + private final boolean verifyChecksums; + private final Block indexBlock; + private final BlockHandle metaindexBlockHandle; + private final RandomInputFile source; + private final LRUCache.LRUSubCache blockCache; + private final FilterBlockReader filter; + + public Table(RandomInputFile source, Comparator comparator, boolean verifyChecksum, LRUCache blockCache, final FilterPolicy filterPolicy) throws IOException { - requireNonNull(name, "name is null"); - requireNonNull(fileChannel, "fileChannel is null"); - long size = fileChannel.size(); + this.source = source; + this.blockCache = cacheForTable(blockCache); + requireNonNull(source, "source is null"); + long size = source.size(); checkArgument(size >= Footer.ENCODED_LENGTH, "File is corrupt: size must be at least %s bytes", Footer.ENCODED_LENGTH); requireNonNull(comparator, "comparator is null"); - this.name = name; - this.fileChannel = fileChannel; - this.verifyChecksums = verifyChecksums; + this.verifyChecksums = verifyChecksum; this.comparator = comparator; + final ByteBuffer footerData = source.read(size - Footer.ENCODED_LENGTH, Footer.ENCODED_LENGTH); - Footer footer = init(); - indexBlock = readBlock(footer.getIndexBlockHandle()); + Footer footer = Footer.readFooter(Slices.avoidCopiedBuffer(footerData)); + indexBlock = new Block(readRawBlock(footer.getIndexBlockHandle()), comparator); //no need for cache metaindexBlockHandle = footer.getMetaindexBlockHandle(); + this.filter = readMeta(filterPolicy); + } - protected abstract Footer init() - throws IOException; + private FilterBlockReader readMeta(FilterPolicy filterPolicy) throws IOException + { + if (filterPolicy == null) { + return null; // Do not need any metadata + } + + final Block meta = new Block(readRawBlock(metaindexBlockHandle), new BytewiseComparator()); + final BlockIterator iterator = meta.iterator(); + final Slice targetKey = new Slice(("filter." + filterPolicy.name()).getBytes(CHARSET)); + iterator.seek(targetKey); + if (iterator.hasNext() && iterator.peek().getKey().equals(targetKey)) { + return readFilter(filterPolicy, iterator.next().getValue()); + } + else { + return null; + } + } + + protected FilterBlockReader readFilter(FilterPolicy filterPolicy, Slice filterHandle) throws IOException + { + final Slice filterBlock = readRawBlock(BlockHandle.readBlockHandle(filterHandle.input())); + return new FilterBlockReader(filterPolicy, filterBlock); + } + + /** + * Get reference to a new sub cache to current table. + * + * @param blockCache global cache + * @return cache scoped to current table + */ + private LRUCache.LRUSubCache cacheForTable(LRUCache blockCache) + { + final LRUCache cache = requireNonNull(blockCache, "Block cache should not be null"); + return cache.subCache(new CacheLoader() + { + @Override + public Slice load(BlockHandle key) throws Exception + { + return readRawBlock(key); + } + }); + } @Override public TableIterator iterator() @@ -72,6 +121,11 @@ public TableIterator iterator() return new TableIterator(this, indexBlock.iterator()); } + public FilterBlockReader getFilter() + { + return filter; + } + public Block openBlock(Slice blockEntry) { BlockHandle blockHandle = BlockHandle.readBlockHandle(blockEntry.input()); @@ -85,16 +139,78 @@ public Block openBlock(Slice blockEntry) return dataBlock; } - protected static ByteBuffer uncompressedScratch = ByteBuffer.allocateDirect(4 * 1024 * 1024); - - protected abstract Block readBlock(BlockHandle blockHandle) - throws IOException; + private Block readBlock(BlockHandle blockHandle) + throws IOException + { + try { + final Slice rawBlock = blockCache.load(blockHandle); + return new Block(rawBlock, comparator); + } + catch (ExecutionException e) { + Throwables.propagateIfPossible(e.getCause(), IOException.class); + throw new IOException(e.getCause()); + } + } - protected int uncompressedLength(ByteBuffer data) + protected Slice readRawBlock(BlockHandle blockHandle) throws IOException { - int length = VariableLengthQuantity.readVariableLengthInt(data.duplicate()); - return length; + // read block trailer + final ByteBuffer trailerData = source.read(blockHandle.getOffset() + blockHandle.getDataSize(), BlockTrailer.ENCODED_LENGTH); + final BlockTrailer blockTrailer = BlockTrailer.readBlockTrailer(Slices.avoidCopiedBuffer(trailerData)); + +// todo re-enable crc check when ported to support direct buffers +// // only verify check sums if explicitly asked by the user +// if (verifyChecksums) { +// // checksum data and the compression type in the trailer +// PureJavaCrc32C checksum = new PureJavaCrc32C(); +// checksum.update(data.getRawArray(), data.getRawOffset(), blockHandle.getDataSize() + 1); +// int actualCrc32c = checksum.getMaskedValue(); +// +// checkState(blockTrailer.getCrc32c() == actualCrc32c, "Block corrupted: checksum mismatch"); +// } + + // decompress data + Slice uncompressedData; + ByteBuffer uncompressedBuffer = source.read(blockHandle.getOffset(), blockHandle.getDataSize()); + if (blockTrailer.getCompressionType() == SNAPPY) { + int uncompressedLength = uncompressedLength(uncompressedBuffer); + final ByteBuffer uncompressedScratch = ByteBuffer.allocateDirect(uncompressedLength); + Snappy.uncompress(uncompressedBuffer, uncompressedScratch); + uncompressedData = Slices.copiedBuffer(uncompressedScratch); + } + else { + uncompressedData = Slices.avoidCopiedBuffer(uncompressedBuffer); + } + + return uncompressedData; + } + + public T internalGet(Slice key, KeyValueFunction keyValueFunction) + { + final BlockIterator iterator = indexBlock.iterator(); + iterator.seek(key); + if (iterator.hasNext()) { + final BlockEntry peek = iterator.peek(); + final Slice handleValue = peek.getValue(); + if (filter != null && !filter.keyMayMatch(BlockHandle.readBlockHandle(handleValue.input()).getOffset(), key)) { + return null; + } + else { + final BlockIterator iterator1 = openBlock(handleValue).iterator(); + iterator1.seek(key); + if (iterator1.hasNext()) { + final BlockEntry next = iterator1.next(); + return keyValueFunction.apply(next.getKey(), next.getValue()); + } + } + } + return null; + } + + private int uncompressedLength(ByteBuffer data) + { + return VariableLengthQuantity.readVariableLengthInt(data.duplicate()); } /** @@ -123,34 +239,31 @@ public long getApproximateOffsetOf(Slice key) @Override public String toString() { - StringBuilder sb = new StringBuilder(); - sb.append("Table"); - sb.append("{name='").append(name).append('\''); - sb.append(", comparator=").append(comparator); - sb.append(", verifyChecksums=").append(verifyChecksums); - sb.append('}'); - return sb.toString(); + return "Table" + + "{source='" + source + '\'' + + ", comparator=" + comparator + + ", verifyChecksums=" + verifyChecksums + + '}'; } public Callable closer() { - return new Closer(fileChannel); + return new CloseableToCallable(source); } - private static class Closer - implements Callable + private static class CloseableToCallable implements Callable { - private final Closeable closeable; + private RandomInputFile source; - public Closer(Closeable closeable) + public CloseableToCallable(RandomInputFile source) { - this.closeable = closeable; + this.source = source; } @Override - public Void call() + public Object call() throws Exception { - Closeables.closeQuietly(closeable); + source.close(); return null; } } diff --git a/leveldb/src/main/java/org/iq80/leveldb/table/TableBuilder.java b/leveldb/src/main/java/org/iq80/leveldb/table/TableBuilder.java index 9e541873..a6c100eb 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/table/TableBuilder.java +++ b/leveldb/src/main/java/org/iq80/leveldb/table/TableBuilder.java @@ -17,17 +17,16 @@ */ package org.iq80.leveldb.table; -import com.google.common.base.Throwables; import org.iq80.leveldb.CompressionType; import org.iq80.leveldb.Options; import org.iq80.leveldb.util.PureJavaCrc32C; import org.iq80.leveldb.util.Slice; import org.iq80.leveldb.util.Slices; import org.iq80.leveldb.util.Snappy; +import org.iq80.leveldb.util.WritableFile; import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; +import java.nio.charset.Charset; import static com.google.common.base.Preconditions.checkState; import static java.util.Objects.requireNonNull; @@ -41,14 +40,16 @@ public class TableBuilder * and taking the leading 64 bits. */ public static final long TABLE_MAGIC_NUMBER = 0xdb4775248b80fb57L; + private static final Charset CHARSET = Charset.forName("UTF-8"); private final int blockRestartInterval; private final int blockSize; private final CompressionType compressionType; - private final FileChannel fileChannel; + private final WritableFile file; private final BlockBuilder dataBlockBuilder; private final BlockBuilder indexBlockBuilder; + private final FilterBlockBuilder filterPolicyBuilder; private Slice lastKey; private final UserComparator userComparator; @@ -71,18 +72,12 @@ public class TableBuilder private long position; - public TableBuilder(Options options, FileChannel fileChannel, UserComparator userComparator) + public TableBuilder(Options options, WritableFile file, UserComparator userComparator) { requireNonNull(options, "options is null"); - requireNonNull(fileChannel, "fileChannel is null"); - try { - checkState(position == fileChannel.position(), "Expected position %s to equal fileChannel.position %s", position, fileChannel.position()); - } - catch (IOException e) { - throw Throwables.propagate(e); - } + requireNonNull(file, "file is null"); - this.fileChannel = fileChannel; + this.file = file; this.userComparator = userComparator; blockRestartInterval = options.blockRestartInterval(); @@ -96,6 +91,14 @@ public TableBuilder(Options options, FileChannel fileChannel, UserComparator use indexBlockBuilder = new BlockBuilder(BlockHandle.MAX_ENCODED_LENGTH * expectedNumberOfBlocks, 1, userComparator); lastKey = Slices.EMPTY_SLICE; + + if (options.filterPolicy() != null) { + filterPolicyBuilder = new FilterBlockBuilder((FilterPolicy) options.filterPolicy()); + filterPolicyBuilder.startBlock(0); + } + else { + filterPolicyBuilder = null; + } } public long getEntryCount() @@ -106,7 +109,7 @@ public long getEntryCount() public long getFileSize() throws IOException { - return position + dataBlockBuilder.currentSizeEstimate(); + return position; } public void add(BlockEntry blockEntry) @@ -139,6 +142,10 @@ public void add(Slice key, Slice value) pendingIndexEntry = false; } + if (filterPolicyBuilder != null) { + filterPolicyBuilder.addKey(key); + } + lastKey = key; entryCount++; dataBlockBuilder.add(key, value); @@ -160,6 +167,11 @@ private void flush() checkState(!pendingIndexEntry, "Internal error: Table already has a pending index entry to flush"); pendingHandle = writeBlock(dataBlockBuilder); + + if (filterPolicyBuilder != null) { + filterPolicyBuilder.startBlock(position); + } + pendingIndexEntry = true; } @@ -168,7 +180,16 @@ private BlockHandle writeBlock(BlockBuilder blockBuilder) { // close the block Slice raw = blockBuilder.finish(); + BlockHandle blockHandle = writeRawBlock(raw); + // clean up state + blockBuilder.reset(); + + return blockHandle; + } + + private BlockHandle writeRawBlock(Slice raw) throws IOException + { // attempt to compress the block Slice blockContents = raw; CompressionType blockCompressionType = CompressionType.NONE; @@ -196,11 +217,9 @@ private BlockHandle writeBlock(BlockBuilder blockBuilder) BlockHandle blockHandle = new BlockHandle(position, blockContents.length()); // write data and trailer - position += fileChannel.write(new ByteBuffer[] {blockContents.toByteBuffer(), trailer.toByteBuffer()}); - - // clean up state - blockBuilder.reset(); - + file.append(blockContents); + file.append(trailer); + position += blockContents.length() + trailer.length(); return blockHandle; } @@ -240,9 +259,19 @@ public void finish() // mark table as closed closed = true; + BlockHandle filterBlockHandle = null; + + if (filterPolicyBuilder != null) { + filterBlockHandle = writeRawBlock(filterPolicyBuilder.finish()); + } + // write (empty) meta index block BlockBuilder metaIndexBlockBuilder = new BlockBuilder(256, blockRestartInterval, new BytewiseComparator()); - // TODO(postrelease): Add stats and other meta blocks + + if (filterBlockHandle != null) { + metaIndexBlockBuilder.add(new Slice(("filter." + filterPolicyBuilder.name()).getBytes(CHARSET)), BlockHandle.writeBlockHandle(filterBlockHandle)); + } + BlockHandle metaindexBlockHandle = writeBlock(metaIndexBlockBuilder); // add last handle to index block @@ -260,7 +289,8 @@ public void finish() // write footer Footer footer = new Footer(metaindexBlockHandle, indexBlockHandle); Slice footerEncoding = Footer.writeFooter(footer); - position += fileChannel.write(footerEncoding.toByteBuffer()); + file.append(footerEncoding); + position += footerEncoding.length(); } public void abandon() diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/AbstractSeekingIterator.java b/leveldb/src/main/java/org/iq80/leveldb/util/AbstractSeekingIterator.java index 5e20ac84..5588ada8 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/util/AbstractSeekingIterator.java +++ b/leveldb/src/main/java/org/iq80/leveldb/util/AbstractSeekingIterator.java @@ -53,11 +53,8 @@ public final boolean hasNext() @Override public final Entry next() { - if (nextElement == null) { - nextElement = getNextElement(); - if (nextElement == null) { - throw new NoSuchElementException(); - } + if (!hasNext()) { + throw new NoSuchElementException(); } Entry result = nextElement; @@ -68,11 +65,8 @@ public final Entry next() @Override public final Entry peek() { - if (nextElement == null) { - nextElement = getNextElement(); - if (nextElement == null) { - throw new NoSuchElementException(); - } + if (!hasNext()) { + throw new NoSuchElementException(); } return nextElement; diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/DbIterator.java b/leveldb/src/main/java/org/iq80/leveldb/util/DbIterator.java index dfb249b1..70a2a52e 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/util/DbIterator.java +++ b/leveldb/src/main/java/org/iq80/leveldb/util/DbIterator.java @@ -21,6 +21,7 @@ import org.iq80.leveldb.impl.MemTable.MemTableIterator; import org.iq80.leveldb.impl.SeekingIterator; +import java.io.Closeable; import java.util.Comparator; import java.util.Iterator; import java.util.List; @@ -31,7 +32,7 @@ public final class DbIterator extends AbstractSeekingIterator - implements InternalIterator + implements InternalIterator, Closeable { /* * NOTE: This code has been specifically tuned for performance of the DB @@ -51,27 +52,26 @@ public final class DbIterator private final MemTableIterator memTableIterator; private final MemTableIterator immutableMemTableIterator; - private final List level0Files; - private final List levels; + private final List levels; private final Comparator comparator; + private final Runnable cleanup; private final ComparableIterator[] heap; private int heapSize; public DbIterator(MemTableIterator memTableIterator, - MemTableIterator immutableMemTableIterator, - List level0Files, - List levels, - Comparator comparator) + MemTableIterator immutableMemTableIterator, + List levels, + Comparator comparator, Runnable cleanup) { this.memTableIterator = memTableIterator; this.immutableMemTableIterator = immutableMemTableIterator; - this.level0Files = level0Files; this.levels = levels; this.comparator = comparator; + this.cleanup = cleanup; - this.heap = new ComparableIterator[3 + level0Files.size() + levels.size()]; + this.heap = new ComparableIterator[3 + levels.size()]; resetPriorityQueue(); } @@ -84,10 +84,7 @@ protected void seekToFirstInternal() if (immutableMemTableIterator != null) { immutableMemTableIterator.seekToFirst(); } - for (InternalTableIterator level0File : level0Files) { - level0File.seekToFirst(); - } - for (LevelIterator level : levels) { + for (InternalIterator level : levels) { level.seekToFirst(); } resetPriorityQueue(); @@ -102,10 +99,7 @@ protected void seekInternal(InternalKey targetKey) if (immutableMemTableIterator != null) { immutableMemTableIterator.seek(targetKey); } - for (InternalTableIterator level0File : level0Files) { - level0File.seek(targetKey); - } - for (LevelIterator level : levels) { + for (InternalIterator level : levels) { level.seek(targetKey); } resetPriorityQueue(); @@ -151,12 +145,7 @@ private void resetPriorityQueue() if (immutableMemTableIterator != null && immutableMemTableIterator.hasNext()) { heapAdd(new ComparableIterator(immutableMemTableIterator, comparator, i++, immutableMemTableIterator.next())); } - for (InternalTableIterator level0File : level0Files) { - if (level0File.hasNext()) { - heapAdd(new ComparableIterator(level0File, comparator, i++, level0File.next())); - } - } - for (LevelIterator level : levels) { + for (InternalIterator level : levels) { if (level.hasNext()) { heapAdd(new ComparableIterator(level, comparator, i++, level.next())); } @@ -213,13 +202,18 @@ public String toString() sb.append("DbIterator"); sb.append("{memTableIterator=").append(memTableIterator); sb.append(", immutableMemTableIterator=").append(immutableMemTableIterator); - sb.append(", level0Files=").append(level0Files); sb.append(", levels=").append(levels); sb.append(", comparator=").append(comparator); sb.append('}'); return sb.toString(); } + @Override + public void close() + { + cleanup.run(); + } + private static class ComparableIterator implements Iterator>, Comparable { diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/Hash.java b/leveldb/src/main/java/org/iq80/leveldb/util/Hash.java new file mode 100644 index 00000000..8d11045f --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/Hash.java @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.util; + +/** + * @author Honore Vasconcelos + */ +public final class Hash +{ + private Hash() + { + } + + public static int hash(byte[] data, int seed) + { + return hash(data, 0, data.length, seed); + } + + /** + * Partial array hash that start at offset and with len size. + * + * @param data full data + * @param offset data start offset + * @param len length of data + * @param seed hash seed + * @return hash (sign has no meaning) + */ + public static int hash(byte[] data, int offset, int len, int seed) + { + final int endIdx = len + offset; + // Similar to murmur hash + int m = 0xc6a4a793; + int r = 24; + + int h = seed ^ (len * m); + + int idx = offset; + // Pick up four bytes at a time + for (; idx + 4 <= endIdx; idx += 4) { + int w = byteToInt(data, idx); + h += w; + h *= m; + h ^= (h >>> 16); + } + + // Pick up remaining bytes + final int remaining = endIdx - idx; + switch (remaining) { + case 3: + h += (data[idx + 2] & 0xff) << 16; + //FALLTHROUGH INTENDED: DO NOT PUT BREAK + case 2: + h += (data[idx + 1] & 0xff) << 8; + //FALLTHROUGH INTENDED: DO NOT PUT BREAK + case 1: + h += data[idx] & 0xff; + h *= m; + h ^= (h >>> r); + break; + } + return h; + } + + private static int byteToInt(byte[] data, final int index) + { + return (data[index] & 0xff) | + (data[index + 1] & 0xff) << 8 | + (data[index + 2] & 0xff) << 16 | + (data[index + 3] & 0xff) << 24; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/LRUCache.java b/leveldb/src/main/java/org/iq80/leveldb/util/LRUCache.java new file mode 100644 index 00000000..6b799456 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/LRUCache.java @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.iq80.leveldb.util; + +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.cache.Weigher; + +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Single LRU cache that can be used by multiple clients, each client can use identical key without interfering with + * other clients. + *

+ * To obtain a new cache client instance simply use {@link LRUCache#subCache(CacheLoader)}. + *

+ * Try to reduce number of objects created by having only one loader per client. + * + * @author Honore Vasconcelos + */ +public final class LRUCache +{ + private final AtomicLong idGenerator = new AtomicLong(); + private final LoadingCache, V> cache; + private final CacheLoader, V> valueLoader = new CacheLoader, V>() + { + @Override + public V load(CacheKey key) throws Exception + { + return key.loader.load(key.key); + } + }; + + public LRUCache(int capacity, final Weigher weigher) + { + this.cache = CacheBuilder., V>newBuilder() + .maximumWeight(capacity) + .weigher(new CacheKeyWeigher<>(weigher)) + .concurrencyLevel(1 << 4) + .build(valueLoader); + } + + public LRUSubCache subCache(final CacheLoader loader) + { + return new LRUSubCache() + { + private final long id = idGenerator.incrementAndGet(); + + @Override + public V load(K k) throws ExecutionException + { + return cache.get(new CacheKey<>(id, k, loader)); + } + }; + } + + public interface LRUSubCache + { + V load(K key) throws ExecutionException; + } + + public static final class CacheKey + { + private final long id; + private final K key; + //not part of the key, but avoid creating objects for each key search + private final CacheLoader loader; + + CacheKey(final long id, K key, CacheLoader loader) + { + this.id = id; + this.key = key; + this.loader = loader; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + CacheKey cacheKey = (CacheKey) o; + + return id == cacheKey.id && key.equals(cacheKey.key); + } + + @Override + public int hashCode() + { + int result = (int) (id ^ (id >>> 32)); + result = 31 * result + key.hashCode(); + return result; + } + } + + private static final class CacheKeyWeigher implements Weigher, V> + { + private final Weigher weigher; + + CacheKeyWeigher(Weigher weigher) + { + this.weigher = weigher; + } + + @Override + public int weigh(CacheKey key, V value) + { + return 32 + weigher.weigh(key.key, value); + } + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/MMRandomInputFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/MMRandomInputFile.java new file mode 100644 index 00000000..e948589e --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/MMRandomInputFile.java @@ -0,0 +1,94 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import com.google.common.io.Files; + +import java.io.File; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.MappedByteBuffer; + +import static java.util.Objects.requireNonNull; + +/** + * Memory mapped filed table. + * + * @author Honore Vasconcelos + */ +public class MMRandomInputFile implements RandomInputFile +{ + private final String file; + private final long size; + private final MappedByteBuffer data; + + private MMRandomInputFile(String file, MappedByteBuffer data, long size) + { + this.file = file; + this.size = size; + this.data = data; + } + + /** + * Open file using memory mapped file access. + * @param file file to open + * @return readable file + * @throws IOException If some other I/O error occurs + */ + public static RandomInputFile open(File file) throws IOException + { + requireNonNull(file, "file is null"); + MappedByteBuffer map = Files.map(file); + + return new MMRandomInputFile(file.getAbsolutePath(), map, map.capacity()); + } + + @Override + public long size() + { + return size; + } + + @Override + public ByteBuffer read(long offset, int length) + { + int newPosition = (int) (data.position() + offset); + return (ByteBuffer) data.duplicate().order(ByteOrder.LITTLE_ENDIAN).clear().limit(newPosition + length).position(newPosition); + } + + @Override + public void close() throws IOException + { + try { + ByteBufferSupport.unmap(data); + } + catch (Exception e) { + throw new IOException("Unable to unmap file", e); + } + } + + @Override + public String toString() + { + return "MMTableDataSource{" + + "file='" + file + '\'' + + ", size=" + size + + '}'; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/MMWritableFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/MMWritableFile.java new file mode 100644 index 00000000..fdbffa5d --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/MMWritableFile.java @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import com.google.common.io.Files; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; + +/** + * Memory mapped file implementation of {@link WritableFile}. + */ +public class MMWritableFile implements WritableFile +{ + private final File file; + private final int pageSize; + private MappedByteBuffer mappedByteBuffer; + private int fileOffset; + + private MMWritableFile(File file, int pageSize, MappedByteBuffer map) + { + this.file = file; + this.pageSize = pageSize; + this.fileOffset = 0; + this.mappedByteBuffer = map; + } + + public static WritableFile open(File file, int pageSize) throws IOException + { + return new MMWritableFile(file, pageSize, Files.map(file, FileChannel.MapMode.READ_WRITE, pageSize)); + } + + @Override + public void append(Slice data) throws IOException + { + ensureCapacity(data.length()); + data.getBytes(0, mappedByteBuffer); + } + + private void destroyMappedByteBuffer() + { + if (mappedByteBuffer != null) { + fileOffset += mappedByteBuffer.position(); + unmap(); + } + mappedByteBuffer = null; + } + + private void ensureCapacity(int bytes) + throws IOException + { + if (mappedByteBuffer == null) { + mappedByteBuffer = openNewMap(fileOffset, Math.max(bytes, pageSize)); + } + if (mappedByteBuffer.remaining() < bytes) { + // remap + fileOffset += mappedByteBuffer.position(); + unmap(); + int sizeToGrow = Math.max(bytes, pageSize); + mappedByteBuffer = openNewMap(fileOffset, sizeToGrow); + } + } + + private MappedByteBuffer openNewMap(int fileOffset, int sizeToGrow) throws IOException + { + FileChannel cha = null; + try { + cha = openChannel(); + return cha.map(FileChannel.MapMode.READ_WRITE, fileOffset, sizeToGrow); + } + finally { + Closeables.closeQuietly(cha); + } + } + + private FileChannel openChannel() throws FileNotFoundException + { + return new java.io.RandomAccessFile(file, "rw").getChannel(); + } + + private void unmap() + { + ByteBufferSupport.unmap(mappedByteBuffer); + } + + @Override + public void force() throws IOException + { + } + + @Override + public void close() throws IOException + { + destroyMappedByteBuffer(); + FileChannel cha = null; + try { + cha = openChannel(); + cha.truncate(fileOffset); + } + finally { + Closeables.closeQuietly(cha); + } + } + + @Override + public String toString() + { + return "MMWritableFile{" + + "file=" + file + + '}'; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/RandomInputFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/RandomInputFile.java new file mode 100644 index 00000000..06ad7f9b --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/RandomInputFile.java @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * Read only data source for table data/blocks. + * + * @author Honore Vasconcelos + */ +public interface RandomInputFile extends Closeable +{ + /** + * Source size + */ + long size(); + + /** + * Read {@code length} bytes from source from {@code source} starting at {@code offset} position. + * @param offset position for read start + * @param length length of the bytes to read + * @return read only view of the data. + * @throws IOException on any exception will accessing source media + */ + ByteBuffer read(long offset, int length) throws IOException; +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/SequentialFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/SequentialFile.java new file mode 100644 index 00000000..a1fb0063 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/SequentialFile.java @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.Closeable; +import java.io.IOException; + +public interface SequentialFile extends Closeable +{ + /** + * Skips over and discards n bytes of data from the + * input stream. + * + * @param n the number of bytes to be skipped. + * @throws IOException if n is negative, if the stream does not + * support seek, or if an I/O error occurs. + */ + void skip(long n) throws IOException; + + /** + * Read up to "atMost" bytes from the file. + * + * @param atMost the maximum number of bytes to read. + * @param destination data destination + * @return the total number of bytes read into the destination, or + * -1 if there is no more data because the end of + * the stream has been reached. + * @throws IOException If the first byte cannot be read for any reason + * other than end of file, or if the input stream has been closed, or if + * some other I/O error occurs. + */ + int read(int atMost, SliceOutput destination) throws IOException; +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/SequentialFileImpl.java b/leveldb/src/main/java/org/iq80/leveldb/util/SequentialFileImpl.java new file mode 100644 index 00000000..801155be --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/SequentialFileImpl.java @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; + +import static com.google.common.base.Preconditions.checkState; + +public class SequentialFileImpl implements SequentialFile +{ + private final FileInputStream inputStream; + + private SequentialFileImpl(FileInputStream inputStream) + { + this.inputStream = inputStream; + } + + public static SequentialFile open(File file) throws IOException + { + return new SequentialFileImpl(new FileInputStream(file)); + } + + @Override + public void skip(long n) throws IOException + { + checkState(n >= 0, "n must be positive"); + if (inputStream.skip(n) != n) { + throw new IOException(inputStream + " as not enough bytes to skip"); + } + } + + @Override + public int read(int atMost, SliceOutput destination) throws IOException + { + return destination.writeBytes(inputStream, atMost); + } + + @Override + public void close() throws IOException + { + inputStream.close(); + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/Slices.java b/leveldb/src/main/java/org/iq80/leveldb/util/Slices.java index f11ee8e0..1da60419 100644 --- a/leveldb/src/main/java/org/iq80/leveldb/util/Slices.java +++ b/leveldb/src/main/java/org/iq80/leveldb/util/Slices.java @@ -112,6 +112,16 @@ public static Slice copiedBuffer(ByteBuffer source) return copy; } + public static Slice avoidCopiedBuffer(ByteBuffer source) + { + if (source.hasArray()) { + return new Slice(source.array(), source.arrayOffset(), source.remaining()); + } + else { + return copiedBuffer(source); + } + } + public static Slice copiedBuffer(String string, Charset charset) { requireNonNull(string, "string is null"); diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/UnbufferedRandomInputFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/UnbufferedRandomInputFile.java new file mode 100644 index 00000000..f7996644 --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/UnbufferedRandomInputFile.java @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.channels.FileChannel; + +import static java.util.Objects.requireNonNull; + +/** + * @author Honore Vasconcelos + */ +public class UnbufferedRandomInputFile implements RandomInputFile +{ + private final String file; + private final FileChannel fileChannel; + private final long size; + + private UnbufferedRandomInputFile(String file, FileChannel fileChannel, long size) + { + this.file = file; + this.fileChannel = fileChannel; + this.size = size; + } + + public static RandomInputFile open(File file) throws IOException + { + requireNonNull(file, "file is null"); + FileChannel channel = new FileInputStream(file).getChannel(); + return new UnbufferedRandomInputFile(file.getAbsolutePath(), channel, channel.size()); + } + + @Override + public long size() + { + return size; + } + + @Override + public ByteBuffer read(long offset, int length) throws IOException + { + ByteBuffer uncompressedBuffer = ByteBuffer.allocate(length).order(ByteOrder.LITTLE_ENDIAN); + fileChannel.read(uncompressedBuffer, offset); + if (uncompressedBuffer.hasRemaining()) { + throw new IOException("Could not read all the data"); + } + uncompressedBuffer.clear(); + return uncompressedBuffer; + } + + @Override + public void close() throws IOException + { + fileChannel.close(); + } + + @Override + public String toString() + { + return "FileTableDataSource{" + + "file='" + file + '\'' + + ", size=" + size + + '}'; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/UnbufferedWritableFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/UnbufferedWritableFile.java new file mode 100644 index 00000000..7447a29d --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/UnbufferedWritableFile.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.channels.FileChannel; + +/** + * @author Honore Vasconcelos + */ +public class UnbufferedWritableFile implements WritableFile +{ + private final File file; + private final FileChannel channel; + + private UnbufferedWritableFile(File file, FileChannel channel) + { + this.file = file; + this.channel = channel; + } + + public static WritableFile open(File file) throws FileNotFoundException + { + return new UnbufferedWritableFile(file, new FileOutputStream(file).getChannel()); + } + + @Override + public void append(Slice data) throws IOException + { + channel.write(data.toByteBuffer()); + } + + @Override + public void force() throws IOException + { + channel.force(false); + } + + @Override + public void close() throws IOException + { + channel.close(); + } + + @Override + public String toString() + { + return "UnbufferedWritableFile{" + + "file=" + file + + '}'; + } +} diff --git a/leveldb/src/main/java/org/iq80/leveldb/util/WritableFile.java b/leveldb/src/main/java/org/iq80/leveldb/util/WritableFile.java new file mode 100644 index 00000000..66b649ab --- /dev/null +++ b/leveldb/src/main/java/org/iq80/leveldb/util/WritableFile.java @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.Closeable; +import java.io.IOException; + +/** + * A file abstraction for sequential writing. The implementation + * must provide buffering since callers may append small fragments + * at a time to the file. + * + * @author Honore Vasconcelos + */ +public interface WritableFile extends Closeable +{ + /** + * Append {@code data} to current file position. + * @param data data to append + * @throws IOException on any error accessing file + */ + void append(Slice data) throws IOException; + + /** + * Force sync bytes to filesystem. + * @throws IOException on any error accessing file + */ + void force() throws IOException; +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/ApiTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/ApiTest.java index f16235ce..69d9e93b 100755 --- a/leveldb/src/test/java/org/iq80/leveldb/impl/ApiTest.java +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/ApiTest.java @@ -23,6 +23,8 @@ import org.iq80.leveldb.DBFactory; import org.iq80.leveldb.Options; import org.iq80.leveldb.util.FileUtils; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; @@ -58,12 +60,21 @@ File getTestDirectory(String name) return rc; } - @Test - public void testCompaction() - throws IOException, DBException + @DataProvider(name = "options") + public Object[][] optionsProvider() { - Options options = new Options().createIfMissing(true).compressionType(CompressionType.NONE); + return new Object[][] { + {"mm/mm", new Options().createIfMissing(true).allowMmapWrites(true).allowMmapReads(true).compressionType(CompressionType.NONE)}, + {"mm/raf", new Options().createIfMissing(true).allowMmapWrites(true).allowMmapReads(false).compressionType(CompressionType.NONE)}, + {"raf/mm", new Options().createIfMissing(true).allowMmapWrites(false).allowMmapReads(true).compressionType(CompressionType.NONE)}, + {"raf/raf", new Options().createIfMissing(true).allowMmapWrites(false).allowMmapReads(false).compressionType(CompressionType.NONE)}, + }; + } + @Test(invocationCount = 2, dataProvider = "options") + public void testCompaction(String op, Options options) + throws IOException, DBException + { File path = getTestDirectory("testCompaction"); DB db = factory.open(path, options); @@ -99,4 +110,10 @@ public void testCompaction() db.close(); } + + @AfterMethod + public void tearDown() + { + FileUtils.deleteRecursively(databaseDir); + } } diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/DbImplTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/DbImplTest.java index cc833cbe..2f7bb41a 100644 --- a/leveldb/src/test/java/org/iq80/leveldb/impl/DbImplTest.java +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/DbImplTest.java @@ -17,9 +17,11 @@ */ package org.iq80.leveldb.impl; +import com.google.common.base.Strings; import com.google.common.collect.ImmutableList; -import com.google.common.primitives.Ints; +import com.google.common.collect.Iterators; import com.google.common.primitives.UnsignedBytes; +import org.iq80.leveldb.CompressionType; import org.iq80.leveldb.DB; import org.iq80.leveldb.DBComparator; import org.iq80.leveldb.DBIterator; @@ -29,15 +31,20 @@ import org.iq80.leveldb.Snapshot; import org.iq80.leveldb.WriteBatch; import org.iq80.leveldb.WriteOptions; +import org.iq80.leveldb.table.BloomFilterPolicy; +import org.iq80.leveldb.util.DbIterator; import org.iq80.leveldb.util.FileUtils; import org.iq80.leveldb.util.Slice; import org.iq80.leveldb.util.Slices; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.math.BigInteger; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -45,6 +52,9 @@ import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.Random; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import static com.google.common.collect.Maps.immutableEntry; import static java.nio.charset.StandardCharsets.UTF_8; @@ -70,14 +80,23 @@ public class DbImplTest private File databaseDir; - @Test - public void testBackgroundCompaction() + @DataProvider(name = "options") + public Object[][] optionsProvider() + { + return new Object[][] { + {"No Compression", new Options().compressionType(CompressionType.NONE)}, + {"Bloom Filter", new Options().filterPolicy(new BloomFilterPolicy(10))}, + {"Snappy", new Options().compressionType(CompressionType.SNAPPY)} + }; + } + + @Test(dataProvider = "options") + public void testBackgroundCompaction(final String desc, final Options options) throws Exception { - Options options = new Options(); options.maxOpenFiles(100); options.createIfMissing(true); - DbImpl db = new DbImpl(options, this.databaseDir); + DbImpl db = new DbImpl(options, this.databaseDir, new EnvImpl()); Random random = new Random(301); for (int i = 0; i < 200000 * STRESS_FACTOR; i++) { db.put(randomString(random, 64).getBytes(), new byte[] {0x01}, new WriteOptions().sync(false)); @@ -89,12 +108,68 @@ public void testBackgroundCompaction() } @Test - public void testCompactionsOnBigDataSet() - throws Exception + public void testConcurrentWrite() throws Exception { Options options = new Options(); + options.maxOpenFiles(50); + options.createIfMissing(true); + final DbImpl db = new DbImpl(options, this.databaseDir, new EnvImpl()); + ExecutorService ex = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() * 4); + try { + final int numEntries = 1000000; + final int growValueBy = 10; + final CountDownLatch segmentsToPutEnd = new CountDownLatch(numEntries / 100); + final Random random = new Random(Thread.currentThread().getId()); + final int segmentSize = 100; + //dispatch writes + for (int i = 0; i < numEntries; i += segmentSize) { + final int finalI = i; + ex.submit(new Runnable() + { + @Override + public void run() + { + final int i2 = finalI + segmentSize; + for (int j = finalI; j < i2; j++) { + final BigInteger bigInteger = BigInteger.valueOf(j); + final byte[] value = bigInteger.toByteArray(); + final byte[] bytes = new byte[growValueBy + value.length]; + for (int k = 0; k < growValueBy; k += value.length) { + System.arraycopy(value, 0, bytes, k, value.length); + } + db.put(value, bytes); + if (random.nextInt(100) < 2) { + Thread.yield(); + } + } + segmentsToPutEnd.countDown(); + } + }); + } + segmentsToPutEnd.await(); + //check all writes have + for (int i = 0; i < numEntries; i++) { + final BigInteger bigInteger = BigInteger.valueOf(i); + final byte[] value = bigInteger.toByteArray(); + final byte[] bytes = new byte[growValueBy + value.length]; + for (int k = 0; k < growValueBy; k += value.length) { + System.arraycopy(value, 0, bytes, k, value.length); + } + assertEquals(db.get(value), bytes); + } + } + finally { + db.close(); + ex.shutdownNow(); + } + } + + @Test(dataProvider = "options") + public void testCompactionsOnBigDataSet(final String desc, final Options options) + throws Exception + { options.createIfMissing(true); - DbImpl db = new DbImpl(options, databaseDir); + DbImpl db = new DbImpl(options, databaseDir, new EnvImpl()); for (int index = 0; index < 5000000; index++) { String key = "Key LOOOOOOOOOOOOOOOOOONG KEY " + index; String value = "This is element " + index + "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABZASDFASDKLFJASDFKJSDFLKSDJFLKJSDHFLKJHSDJFSDFHJASDFLKJSDF"; @@ -102,22 +177,21 @@ public void testCompactionsOnBigDataSet() } } - @Test - public void testEmpty() + @Test(dataProvider = "options") + public void testEmpty(final String desc, final Options options) throws Exception { - Options options = new Options(); File databaseDir = this.databaseDir; DbStringWrapper db = new DbStringWrapper(options, databaseDir); assertNull(db.get("foo")); } - @Test - public void testEmptyBatch() + @Test(dataProvider = "options") + public void testEmptyBatch(final String desc, final Options options) throws Exception { // open new db - Options options = new Options().createIfMissing(true); + options.createIfMissing(true); DB db = new Iq80DBFactory().open(databaseDir, options); // write an empty batch @@ -132,11 +206,11 @@ public void testEmptyBatch() new Iq80DBFactory().open(databaseDir, options); } - @Test - public void testReadWrite() + @Test(dataProvider = "options") + public void testReadWrite(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); assertEquals(db.get("foo"), "v1"); db.put("bar", "v2"); @@ -145,11 +219,11 @@ public void testReadWrite() assertEquals(db.get("bar"), "v2"); } - @Test - public void testPutDeleteGet() + @Test(dataProvider = "options") + public void testPutDeleteGet(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); assertEquals(db.get("foo"), "v1"); db.put("foo", "v2"); @@ -158,12 +232,12 @@ public void testPutDeleteGet() assertNull(db.get("foo")); } - @Test - public void testGetFromImmutableLayer() + @Test(dataProvider = "options") + public void testGetFromImmutableLayer(final String desc, final Options options) throws Exception { // create db with small write buffer - DbStringWrapper db = new DbStringWrapper(new Options().writeBufferSize(100000), databaseDir); + DbStringWrapper db = new DbStringWrapper(options.writeBufferSize(100000), databaseDir); db.put("foo", "v1"); assertEquals(db.get("foo"), "v1"); @@ -178,21 +252,21 @@ public void testGetFromImmutableLayer() // todo Release sync calls } - @Test - public void testGetFromVersions() + @Test(dataProvider = "options") + public void testGetFromVersions(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); db.compactMemTable(); assertEquals(db.get("foo"), "v1"); } - @Test - public void testGetSnapshot() + @Test(dataProvider = "options") + public void testGetSnapshot(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); // Try with both a short key and a long key for (int i = 0; i < 2; i++) { @@ -210,11 +284,11 @@ public void testGetSnapshot() } } - @Test - public void testGetLevel0Ordering() + @Test(dataProvider = "options") + public void testGetLevel0Ordering(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); // Check that we process level-0 files in correct order. The code // below generates two level-0 files where the earlier one comes @@ -228,11 +302,11 @@ public void testGetLevel0Ordering() assertEquals(db.get("foo"), "v2"); } - @Test - public void testGetOrderedByLevels() + @Test(dataProvider = "options") + public void testGetOrderedByLevels(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); db.compact("a", "z"); assertEquals(db.get("foo"), "v1"); @@ -242,11 +316,11 @@ public void testGetOrderedByLevels() assertEquals(db.get("foo"), "v2"); } - @Test - public void testGetPicksCorrectFile() + @Test(dataProvider = "options") + public void testGetPicksCorrectFile(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("a", "va"); db.compact("a", "b"); db.put("x", "vx"); @@ -260,40 +334,50 @@ public void testGetPicksCorrectFile() } - @Test - public void testEmptyIterator() + //TODO implement GetEncountersEmptyLevel + + @Test(dataProvider = "options") + public void testEmptyIterator(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); - SeekingIterator iterator = db.iterator(); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); + StringDbIterator iterator = db.iterator(); iterator.seekToFirst(); assertNoNextElement(iterator); iterator.seek("foo"); assertNoNextElement(iterator); + iterator.close(); } - @Test - public void testIteratorSingle() + @Test(dataProvider = "options") + public void testIteratorSingle(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("a", "va"); - assertSequence(db.iterator(), immutableEntry("a", "va")); + StringDbIterator iterator = db.iterator(); + assertSequence(iterator, immutableEntry("a", "va")); + iterator.close(); } - @Test - public void testIteratorMultiple() + @Test(dataProvider = "options") + public void testIteratorMultiple(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("a", "va"); db.put("b", "vb"); db.put("c", "vc"); - SeekingIterator iterator = db.iterator(); + StringDbIterator iterator = db.iterator(); + assertSequence(iterator, + immutableEntry("a", "va"), + immutableEntry("b", "vb"), + immutableEntry("c", "vc")); + iterator.seekToFirst(); assertSequence(iterator, immutableEntry("a", "va"), immutableEntry("b", "vb"), @@ -310,13 +394,53 @@ public void testIteratorMultiple() immutableEntry("a", "va"), immutableEntry("b", "vb"), immutableEntry("c", "vc")); + iterator.close(); } - @Test - public void testRecover() + @Test(dataProvider = "options") + public void testIterSmallAndLargeMix(final String desc, final Options options) + throws IOException + { + DbStringWrapper db = new DbStringWrapper(options, databaseDir); + db.put("a", "va"); + db.put("b", Strings.repeat("b", 100000)); + db.put("c", "vc"); + db.put("d", Strings.repeat("d", 100000)); + db.put("e", Strings.repeat("e", 100000)); + StringDbIterator iterator = db.iterator(); + assertSequence(iterator, + immutableEntry("a", "va"), + immutableEntry("b", Strings.repeat("b", 100000)), + immutableEntry("c", "vc"), + immutableEntry("d", Strings.repeat("d", 100000)), + immutableEntry("e", Strings.repeat("e", 100000))); + iterator.close(); + + } + + @Test(dataProvider = "options") + public void testIterMultiWithDelete(final String desc, final Options options) + throws IOException + { + DbStringWrapper db = new DbStringWrapper(options, databaseDir); + db.put("b", "vb"); + db.put("c", "vc"); + db.put("a", "va"); + db.delete("b"); + assertNull(db.get("b")); + StringDbIterator iterator = db.iterator(); + iterator.seek("c"); + assertSequence(iterator, + immutableEntry("c", "vc") + ); + iterator.close(); + } + + @Test(dataProvider = "options") + public void testRecover(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); db.put("baz", "v5"); @@ -337,11 +461,11 @@ public void testRecover() } - @Test - public void testRecoveryWithEmptyLog() + @Test(dataProvider = "options") + public void testRecoveryWithEmptyLog(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); db.put("foo", "v2"); db.reopen(); @@ -351,11 +475,11 @@ public void testRecoveryWithEmptyLog() assertEquals(db.get("foo"), "v3"); } - @Test - public void testRecoverDuringMemtableCompaction() + @Test(dataProvider = "options") + public void testRecoverDuringMemtableCompaction(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options().writeBufferSize(1000000), databaseDir); + DbStringWrapper db = new DbStringWrapper(options.writeBufferSize(1000000), databaseDir); // Trigger a long memtable compaction and reopen the database during it db.put("foo", "v1"); // Goes to 1st log file @@ -371,11 +495,11 @@ public void testRecoverDuringMemtableCompaction() } - @Test - public void testMinorCompactionsHappen() + @Test(dataProvider = "options") + public void testMinorCompactionsHappen(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options().writeBufferSize(10000), databaseDir); + DbStringWrapper db = new DbStringWrapper(options.writeBufferSize(10000), databaseDir); int n = 500; int startingNumTables = db.totalTableFiles(); @@ -401,18 +525,18 @@ public void testMinorCompactionsHappen() } - @Test - public void testRecoverWithLargeLog() + @Test(dataProvider = "options") + public void testRecoverWithLargeLog(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("big1", longString(200000, '1')); db.put("big2", longString(200000, '2')); db.put("small3", longString(10, '3')); db.put("small4", longString(10, '4')); assertEquals(db.numberOfFilesInLevel(0), 0); - db.reopen(new Options().writeBufferSize(100000)); + db.reopen(options.writeBufferSize(100000)); assertEquals(db.numberOfFilesInLevel(0), 3); assertEquals(db.get("big1"), longString(200000, '1')); assertEquals(db.get("big2"), longString(200000, '2')); @@ -421,11 +545,11 @@ public void testRecoverWithLargeLog() assertTrue(db.numberOfFilesInLevel(0) > 1); } - @Test - public void testCompactionsGenerateMultipleFiles() + @Test(dataProvider = "options") + public void testCompactionsGenerateMultipleFiles(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options().writeBufferSize(100000000), databaseDir); + DbStringWrapper db = new DbStringWrapper(options.writeBufferSize(100000000), databaseDir); // Write 8MB (80 values, each 100K) assertEquals(db.numberOfFilesInLevel(0), 0); @@ -451,11 +575,11 @@ public void testCompactionsGenerateMultipleFiles() } } - @Test - public void testRepeatedWritesToSameKey() + @Test(dataProvider = "options") + public void testRepeatedWritesToSameKey(final String desc, final Options options) throws Exception { - Options options = new Options().writeBufferSize(100000); + options.writeBufferSize(100000); DbStringWrapper db = new DbStringWrapper(options, databaseDir); // We must have at most one file per level except for level-0, @@ -472,8 +596,8 @@ public void testRepeatedWritesToSameKey() db.close(); } - @Test - public void testSparseMerge() + @Test(dataProvider = "options") + public void testSparseMerge(final String desc, final Options options) throws Exception { DbStringWrapper db = new DbStringWrapper(new Options().compressionType(NONE), databaseDir); @@ -591,14 +715,14 @@ public void testApproximateSizesMixOfSmallAndLarge() } } - @Test - public void testIteratorPinsRef() + @Test(dataProvider = "options") + public void testIteratorPinsRef(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "hello"); - SeekingIterator iterator = db.iterator(); + StringDbIterator iterator = db.iterator(); db.put("foo", "newvalue1"); for (int i = 0; i < 100; i++) { @@ -607,13 +731,14 @@ public void testIteratorPinsRef() db.put("foo", "newvalue1"); assertSequence(iterator, immutableEntry("foo", "hello")); + iterator.close(); } - @Test - public void testSnapshot() + @Test(dataProvider = "options") + public void testSnapshot(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); Snapshot s1 = db.getSnapshot(); db.put("foo", "v2"); @@ -641,11 +766,11 @@ public void testSnapshot() assertEquals("v4", db.get("foo")); } - @Test - public void testHiddenValuesAreRemoved() + @Test(dataProvider = "options") + public void testHiddenValuesAreRemoved(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); Random random = new Random(301); fillLevels(db, "a", "z"); @@ -676,10 +801,78 @@ public void testHiddenValuesAreRemoved() } @Test - public void testDeletionMarkers1() + public void testDeleteEntriesShouldNotAbeamOnIteration() throws Exception + { + DbStringWrapper db = new DbStringWrapper(new Options().createIfMissing(true), databaseDir); + db.put("b", "v"); + db.delete("b"); + db.delete("a"); + assertEquals("[]", toString(db)); + } + + @Test + public void testL0CompactionGoogleBugIssue44a() throws Exception + { + DbStringWrapper db = new DbStringWrapper(new Options().createIfMissing(true), databaseDir); + db.reopen(); + db.put("b", "v"); + db.reopen(); + db.delete("b"); + db.delete("a"); + db.reopen(); + db.delete("a"); + db.reopen(); + db.put("a", "v"); + db.reopen(); + db.reopen(); + assertEquals("[a=v]", toString(db)); + Thread.sleep(1000); // Wait for compaction to finish + assertEquals("[a=v]", toString(db)); + } + + private String toString(DbStringWrapper db) + { + StringDbIterator iterator = db.iterator(); + String s = Iterators.toString(iterator); + iterator.close(); + return s; + } + + @Test + public void testL0CompactionGoogleBugIssue44b() throws Exception + { + DbStringWrapper db = new DbStringWrapper(new Options().createIfMissing(true), databaseDir); + db.reopen(); + db.put("", ""); + db.reopen(); + db.delete("e"); + db.put("", ""); + db.reopen(); + db.put("c", "cv"); + db.reopen(); + assertEquals("[=, c=cv]", toString(db)); + db.put("", ""); + db.reopen(); + db.put("", ""); + Thread.sleep(1000); // Wait for compaction to finish + db.reopen(); + db.put("d", "dv"); + db.reopen(); + db.put("", ""); + db.reopen(); + db.delete("d"); + db.delete("b"); + db.reopen(); + assertEquals("[=, c=cv]", toString(db)); + Thread.sleep(1000); // Wait for compaction to finish + assertEquals("[=, c=cv]", toString(db)); + } + + @Test(dataProvider = "options") + public void testDeletionMarkers1(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); db.compactMemTable(); @@ -699,7 +892,8 @@ public void testDeletionMarkers1() db.delete("foo"); db.put("foo", "v2"); - assertEquals(db.allEntriesFor("foo"), asList("v2", "DEL", "v1")); + final List foo = db.allEntriesFor("foo"); + assertEquals(foo, asList("v2", "DEL", "v1")); db.compactMemTable(); // Moves to level last-2 assertEquals(db.get("a"), "begin"); assertEquals(db.get("foo"), "v2"); @@ -718,11 +912,11 @@ public void testDeletionMarkers1() assertEquals(db.allEntriesFor("foo"), asList("v2")); } - @Test - public void testDeletionMarkers2() + @Test(dataProvider = "options") + public void testDeletionMarkers2(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); db.put("foo", "v1"); db.compactMemTable(); @@ -753,27 +947,27 @@ public void testDeletionMarkers2() assertEquals(db.allEntriesFor("foo"), asList()); } - @Test - public void testEmptyDb() + @Test(dataProvider = "options") + public void testEmptyDb(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); testDb(db); } - @Test - public void testSingleEntrySingle() + @Test(dataProvider = "options") + public void testSingleEntrySingle(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); testDb(db, immutableEntry("name", "dain sundstrom")); } - @Test - public void testMultipleEntries() + @Test(dataProvider = "options") + public void testMultipleEntries(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); List> entries = asList( immutableEntry("beer/ale", "Lagunitas Little Sumpin’ Sumpin’"), @@ -786,11 +980,11 @@ public void testMultipleEntries() testDb(db, entries); } - @Test - public void testMultiPassMultipleEntries() + @Test(dataProvider = "options") + public void testMultiPassMultipleEntries(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options(), databaseDir); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); List> entries = asList( immutableEntry("beer/ale", "Lagunitas Little Sumpin’ Sumpin’"), @@ -805,7 +999,8 @@ public void testMultiPassMultipleEntries() } } - @Test(expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Database directory '" + DOES_NOT_EXIST_FILENAME_PATTERN + "'.*") + //TODO this test may fail in windows. a path that also fails in windows must be found + @Test(enabled = false, expectedExceptions = IllegalArgumentException.class, expectedExceptionsMessageRegExp = "Database directory '" + DOES_NOT_EXIST_FILENAME_PATTERN + "'.*") public void testCantCreateDirectoryReturnMessage() throws Exception { @@ -833,11 +1028,11 @@ public void testSymbolicLinkForFileWithParent() assertFalse(FileUtils.isSymbolicLink(new File(DOES_NOT_EXIST_FILENAME, "db"))); } - @Test - public void testCustomComparator() + @Test(dataProvider = "options") + public void testCustomComparator(final String desc, final Options options) throws Exception { - DbStringWrapper db = new DbStringWrapper(new Options().comparator(new ReverseDBComparator()), databaseDir); + DbStringWrapper db = new DbStringWrapper(options.comparator(new LexicographicalReverseDBComparator()), databaseDir); List> entries = asList( immutableEntry("scotch/strong", "Lagavulin"), @@ -852,7 +1047,7 @@ public void testCustomComparator() db.put(entry.getKey(), entry.getValue()); } - SeekingIterator seekingIterator = db.iterator(); + StringDbIterator seekingIterator = db.iterator(); for (Entry entry : entries) { assertTrue(seekingIterator.hasNext()); assertEquals(seekingIterator.peek(), entry); @@ -860,6 +1055,72 @@ public void testCustomComparator() } assertFalse(seekingIterator.hasNext()); + seekingIterator.close(); + } + + @Test(dataProvider = "options") + public void testManualCompaction(final String desc, final Options options) throws Exception + { + assertEquals(DbConstants.MAX_MEM_COMPACT_LEVEL, 2); + DbStringWrapper db = new DbStringWrapper(options, databaseDir); + makeTables(db, 3, "p", "q"); + assertEquals("1,1,1", filesPerLevel(db.db)); + + // Compaction range falls before files + db.compactRange("", "c"); + assertEquals("1,1,1", filesPerLevel(db.db)); + + // Compaction range falls after files + db.compactRange("r", "z"); + assertEquals("1,1,1", filesPerLevel(db.db)); + + // Compaction range overlaps files + db.compactRange("p1", "p9"); + assertEquals("0,0,1", filesPerLevel(db.db)); + + // Populate a different range + makeTables(db, 3, "c", "e"); + assertEquals("1,1,2", filesPerLevel(db.db)); + + // Compact just the new range + db.compactRange("b", "f"); + assertEquals("0,0,2", filesPerLevel(db.db)); + + // Compact all + makeTables(db, 1, "a", "z"); + assertEquals("0,1,2", filesPerLevel(db.db)); + db.compactRange(null, null); + assertEquals("0,0,1", filesPerLevel(db.db)); + } + + // Do n memtable compactions, each of which produces an sstable + // covering the range [small,large]. + private void makeTables(DbStringWrapper db, int n, String small, String large) + { + for (int i = 0; i < n; i++) { + db.put(small, "begin"); + db.put(large, "end"); + db.db.testCompactMemTable(); + } + } + + // Return spread of files per level + private String filesPerLevel(DbImpl db) + { + StringBuilder result = new StringBuilder(); + int lastNonZeroOffset = 0; + for (int level = 0; level < DbConstants.NUM_LEVELS; level++) { + int f = db.numberOfFilesInLevel(level); + if (result.length() > 0) { + result.append(","); + } + result.append(f); + if (f > 0) { + lastNonZeroOffset = result.length(); + } + } + result.setLength(lastNonZeroOffset); + return result.toString(); } @SafeVarargs @@ -881,7 +1142,7 @@ private void testDb(DbStringWrapper db, List> entries) assertEquals(actual, entry.getValue(), "Key: " + entry.getKey()); } - SeekingIterator seekingIterator = db.iterator(); + StringDbIterator seekingIterator = db.iterator(); assertSequence(seekingIterator, entries); seekingIterator.seekToFirst(); @@ -901,7 +1162,8 @@ private void testDb(DbStringWrapper db, List> entries) Slice endKey = Slices.wrappedBuffer(new byte[] {(byte) 0xFF, (byte) 0xFF, (byte) 0xFF, (byte) 0xFF}); seekingIterator.seek(endKey.toString(UTF_8)); - assertSequence(seekingIterator, Collections.>emptyList()); + assertSequence(seekingIterator, Collections.emptyList()); + seekingIterator.close(); } @BeforeMethod @@ -980,16 +1242,12 @@ private boolean between(long size, long left, long right) private void fillLevels(DbStringWrapper db, String smallest, String largest) { - for (int level = 0; level < NUM_LEVELS; level++) { - db.put(smallest, "begin"); - db.put(largest, "end"); - db.compactMemTable(); - } + makeTables(db, NUM_LEVELS, smallest, largest); } private final ArrayList opened = new ArrayList<>(); - private static class ReverseDBComparator + private static class LexicographicalReverseDBComparator implements DBComparator { @Override @@ -1015,7 +1273,7 @@ public byte[] findShortestSeparator(byte[] start, byte[] limit) if (sharedBytes < Math.min(start.length, limit.length)) { // if we can add one to the last shared byte without overflow and the two keys differ by more than // one increment at this location. - int lastSharedByte = start[sharedBytes]; + int lastSharedByte = start[sharedBytes] & 0xff; if (lastSharedByte < 0xff && lastSharedByte + 1 < limit[sharedBytes]) { byte[] result = Arrays.copyOf(start, sharedBytes + 1); result[sharedBytes] = (byte) (lastSharedByte + 1); @@ -1048,7 +1306,7 @@ private int calculateSharedBytes(byte[] leftKey, byte[] rightKey) int sharedKeyBytes = 0; if (leftKey != null && rightKey != null) { - int minSharedKeyBytes = Ints.min(leftKey.length, rightKey.length); + int minSharedKeyBytes = Math.min(leftKey.length, rightKey.length); while (sharedKeyBytes < minSharedKeyBytes && leftKey[sharedKeyBytes] == rightKey[sharedKeyBytes]) { sharedKeyBytes++; } @@ -1069,7 +1327,7 @@ private DbStringWrapper(Options options, File databaseDir) { this.options = options.verifyChecksums(true).createIfMissing(true).errorIfExists(true); this.databaseDir = databaseDir; - this.db = new DbImpl(options, databaseDir); + this.db = new DbImpl(options, databaseDir, new EnvImpl()); opened.add(this); } @@ -1101,7 +1359,7 @@ public void delete(String key) db.delete(toByteArray(key)); } - public SeekingIterator iterator() + public StringDbIterator iterator() { return new StringDbIterator(db.iterator()); } @@ -1121,9 +1379,14 @@ public void compactMemTable() db.flushMemTable(); } + public void compactRange(String start, String limit) + { + db.compactRange(start == null ? null : Slices.copiedBuffer(start, UTF_8).getBytes(), limit == null ? null : Slices.copiedBuffer(limit, UTF_8).getBytes()); + } + public void compactRange(int level, String start, String limit) { - db.compactRange(level, Slices.copiedBuffer(start, UTF_8), Slices.copiedBuffer(limit, UTF_8)); + db.compactRange(level, start == null ? null : Slices.copiedBuffer(start, UTF_8), limit == null ? null : Slices.copiedBuffer(limit, UTF_8)); } public void compact(String start, String limit) @@ -1175,13 +1438,15 @@ public void reopen(Options options) throws IOException { db.close(); - db = new DbImpl(options.verifyChecksums(true).createIfMissing(false).errorIfExists(false), databaseDir); + db = new DbImpl(options.verifyChecksums(true).createIfMissing(false).errorIfExists(false), databaseDir, new EnvImpl()); } private List allEntriesFor(String userKey) { ImmutableList.Builder result = ImmutableList.builder(); - for (Entry entry : db.internalIterable()) { + DbIterator iterator = db.internalIterator(); + while (iterator.hasNext()) { + Entry entry = iterator.next(); String entryKey = entry.getKey().getUserKey().toString(UTF_8); if (entryKey.equals(userKey)) { if (entry.getKey().getValueType() == ValueType.VALUE) { @@ -1192,13 +1457,14 @@ private List allEntriesFor(String userKey) } } } + iterator.close(); return result.build(); } } private static class StringDbIterator - implements SeekingIterator + implements SeekingIterator, Closeable { private final DBIterator iterator; @@ -1225,6 +1491,12 @@ public void seek(String targetKey) iterator.seek(targetKey.getBytes(UTF_8)); } + @Override + public void close() + { + iterator.close(); + } + @Override public Entry peek() { diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/LogTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/LogTest.java index da36cf93..78485aff 100644 --- a/leveldb/src/test/java/org/iq80/leveldb/impl/LogTest.java +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/LogTest.java @@ -18,6 +18,9 @@ package org.iq80.leveldb.impl; import com.google.common.collect.ImmutableList; +import org.iq80.leveldb.util.SequentialFile; +import org.iq80.leveldb.util.SequentialFileImpl; +import org.iq80.leveldb.util.Closeables; import org.iq80.leveldb.util.Slice; import org.iq80.leveldb.util.SliceOutput; import org.iq80.leveldb.util.Slices; @@ -26,9 +29,7 @@ import org.testng.annotations.Test; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.nio.channels.FileChannel; import java.util.List; import static java.nio.charset.StandardCharsets.UTF_8; @@ -55,6 +56,7 @@ public void corruption(long bytes, Throwable reason) }; private LogWriter writer; + private File tempFile; @Test public void testEmptyBlock() @@ -139,9 +141,8 @@ private void testLog(List records, boolean closeWriter) // test readRecord - try (FileInputStream fis = new FileInputStream(writer.getFile()); - FileChannel fileChannel = fis.getChannel()) { - LogReader reader = new LogReader(fileChannel, NO_CORRUPTION_MONITOR, true, 0); + try (SequentialFile in = SequentialFileImpl.open(tempFile)) { + LogReader reader = new LogReader(in, NO_CORRUPTION_MONITOR, true, 0); for (Slice expected : records) { Slice actual = reader.readRecord(); assertEquals(actual, expected); @@ -154,15 +155,18 @@ private void testLog(List records, boolean closeWriter) public void setUp() throws Exception { - writer = Logs.createLogWriter(File.createTempFile("table", ".log"), 42); + tempFile = File.createTempFile("table", ".log"); + writer = Logs.createLogWriter(tempFile, 42, Iq80DBFactory.USE_MMAP); } @AfterMethod public void tearDown() - throws Exception { if (writer != null) { - writer.delete(); + Closeables.closeQuietly(writer); + } + if (tempFile != null) { + tempFile.delete(); } } diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/LogWriterTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/LogWriterTest.java new file mode 100644 index 00000000..3b86f59c --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/LogWriterTest.java @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.util.SequentialFile; +import org.iq80.leveldb.util.Slice; +import org.iq80.leveldb.util.SliceOutput; +import org.iq80.leveldb.util.WritableFile; +import org.testng.annotations.Test; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.fail; + +public class LogWriterTest +{ + @Test + public void testLogRecordBounds() + throws Exception + { + StringSink open = new StringSink(); + + int recordSize = LogConstants.BLOCK_SIZE - LogConstants.HEADER_SIZE; + Slice record = new Slice(recordSize); + + LogWriter writer = LogWriter.createWriter(10, open); + writer.addRecord(record, false); + writer.close(); + + LogMonitor logMonitor = new AssertNoCorruptionLogMonitor(); + + try (SequentialFile in = new SequentialBytes(new ByteArrayInputStream(open.sb.toByteArray()))) { + LogReader logReader = new LogReader(in, logMonitor, true, 0); + int count = 0; + for (Slice slice = logReader.readRecord(); slice != null; slice = logReader.readRecord()) { + assertEquals(slice.length(), recordSize); + count++; + } + assertEquals(count, 1); + } + } + + private static class StringSink implements WritableFile + { + private ByteArrayOutputStream sb = new ByteArrayOutputStream(); + + byte[] content; + + @Override + public void append(Slice data) throws IOException + { + sb.write(data.getBytes()); + } + + @Override + public void force() + { + content = sb.toByteArray(); + } + + @Override + public void close() throws IOException + { + content = sb.toByteArray(); + sb.close(); + } + } + + private static class SequentialBytes implements SequentialFile + { + private ByteArrayInputStream in; + + public SequentialBytes(ByteArrayInputStream in) + { + this.in = in; + } + + @Override + public void skip(long n) + { + assertEquals(in.skip(n), n); + } + + @Override + public int read(int atMost, SliceOutput destination) throws IOException + { + return destination.writeBytes(in, atMost); + } + + @Override + public void close() throws IOException + { + in.close(); + } + } + + private static class AssertNoCorruptionLogMonitor + implements LogMonitor + { + @Override + public void corruption(long bytes, String reason) + { + fail("corruption at " + bytes + " reason: " + reason); + } + + @Override + public void corruption(long bytes, Throwable reason) + { + fail("corruption at " + bytes + " reason: " + reason); + } + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/MemTableTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/MemTableTest.java new file mode 100644 index 00000000..de3fa68d --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/MemTableTest.java @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.table.BytewiseComparator; +import org.testng.annotations.Test; + +import static org.iq80.leveldb.util.TestUtils.asciiToBytes; +import static org.iq80.leveldb.util.TestUtils.asciiToSlice; +import static org.testng.Assert.assertEquals; + +public class MemTableTest +{ + @Test + public void testTestSimple() throws Exception + { + InternalKeyComparator cmp = new InternalKeyComparator(new BytewiseComparator()); + final MemTable memtable = new MemTable(cmp); + WriteBatchImpl batch = new WriteBatchImpl(); + batch.put(asciiToBytes("k1"), asciiToBytes("v1")); + batch.put(asciiToBytes("k1"), asciiToBytes("v1")); + batch.put(asciiToBytes("k2"), asciiToBytes("v2")); + batch.put(asciiToBytes("k3"), asciiToBytes("v3")); + batch.put(asciiToBytes("largekey"), asciiToBytes("vlarge")); + batch.forEach(new InsertIntoHandler(memtable, 100)); + final MemTable.MemTableIterator iter = memtable.iterator(); + iter.seekToFirst(); + assertEquals(new InternalEntry(new InternalKey(asciiToSlice("k1"), 101, ValueType.VALUE), asciiToSlice("v1")), iter.next()); + assertEquals(new InternalEntry(new InternalKey(asciiToSlice("k1"), 100, ValueType.VALUE), asciiToSlice("v1")), iter.next()); + assertEquals(new InternalEntry(new InternalKey(asciiToSlice("k2"), 102, ValueType.VALUE), asciiToSlice("v2")), iter.next()); + assertEquals(new InternalEntry(new InternalKey(asciiToSlice("k3"), 103, ValueType.VALUE), asciiToSlice("v3")), iter.next()); + assertEquals(new InternalEntry(new InternalKey(asciiToSlice("largekey"), 104, ValueType.VALUE), asciiToSlice("vlarge")), iter.next()); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/NativeInteropTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/NativeInteropTest.java index 60ff551f..a9e4f28f 100755 --- a/leveldb/src/test/java/org/iq80/leveldb/impl/NativeInteropTest.java +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/NativeInteropTest.java @@ -24,6 +24,7 @@ import org.iq80.leveldb.ReadOptions; import org.iq80.leveldb.WriteOptions; import org.iq80.leveldb.util.FileUtils; +import org.testng.annotations.AfterMethod; import org.testng.annotations.Test; import java.io.File; @@ -140,4 +141,10 @@ public void crud(DBFactory firstFactory, DBFactory secondFactory) db.close(); } + + @AfterMethod + public void tearDown() throws Exception + { + FileUtils.deleteRecursively(databaseDir); + } } diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/TestFileChannelLogWriter.java b/leveldb/src/test/java/org/iq80/leveldb/impl/TestFileChannelLogWriter.java deleted file mode 100644 index be2f236a..00000000 --- a/leveldb/src/test/java/org/iq80/leveldb/impl/TestFileChannelLogWriter.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.impl; - -import org.iq80.leveldb.util.Slice; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileInputStream; -import java.nio.channels.FileChannel; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.fail; - -public class TestFileChannelLogWriter -{ - @Test - public void testLogRecordBounds() - throws Exception - { - File file = File.createTempFile("test", ".log"); - try { - int recordSize = LogConstants.BLOCK_SIZE - LogConstants.HEADER_SIZE; - Slice record = new Slice(recordSize); - - LogWriter writer = new FileChannelLogWriter(file, 10); - writer.addRecord(record, false); - writer.close(); - - LogMonitor logMonitor = new AssertNoCorruptionLogMonitor(); - - try (FileInputStream fis = new FileInputStream(file); - FileChannel channel = fis.getChannel()) { - LogReader logReader = new LogReader(channel, logMonitor, true, 0); - int count = 0; - for (Slice slice = logReader.readRecord(); slice != null; slice = logReader.readRecord()) { - assertEquals(slice.length(), recordSize); - count++; - } - assertEquals(count, 1); - } - } - finally { - file.delete(); - } - } - - private static class AssertNoCorruptionLogMonitor - implements LogMonitor - { - @Override - public void corruption(long bytes, String reason) - { - fail("corruption at " + bytes + " reason: " + reason); - } - - @Override - public void corruption(long bytes, Throwable reason) - { - fail("corruption at " + bytes + " reason: " + reason); - } - } -} diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/TestMMapLogWriter.java b/leveldb/src/test/java/org/iq80/leveldb/impl/TestMMapLogWriter.java deleted file mode 100644 index a7e7a8d5..00000000 --- a/leveldb/src/test/java/org/iq80/leveldb/impl/TestMMapLogWriter.java +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (C) 2011 the original author or authors. - * See the notice.md file distributed with this work for additional - * information regarding copyright ownership. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.iq80.leveldb.impl; - -import org.iq80.leveldb.util.Slice; -import org.testng.annotations.Test; - -import java.io.File; -import java.io.FileInputStream; -import java.nio.channels.FileChannel; - -import static org.testng.Assert.assertEquals; -import static org.testng.Assert.fail; - -public class TestMMapLogWriter -{ - @Test - public void testLogRecordBounds() - throws Exception - { - File file = File.createTempFile("test", ".log"); - try { - int recordSize = LogConstants.BLOCK_SIZE - LogConstants.HEADER_SIZE; - Slice record = new Slice(recordSize); - - LogWriter writer = new MMapLogWriter(file, 10); - writer.addRecord(record, false); - writer.close(); - - LogMonitor logMonitor = new AssertNoCorruptionLogMonitor(); - - FileChannel channel = new FileInputStream(file).getChannel(); - - LogReader logReader = new LogReader(channel, logMonitor, true, 0); - - int count = 0; - for (Slice slice = logReader.readRecord(); slice != null; slice = logReader.readRecord()) { - assertEquals(slice.length(), recordSize); - count++; - } - assertEquals(count, 1); - } - finally { - file.delete(); - } - } - - private static class AssertNoCorruptionLogMonitor - implements LogMonitor - { - @Override - public void corruption(long bytes, String reason) - { - fail("corruption at " + bytes + " reason: " + reason); - } - - @Override - public void corruption(long bytes, Throwable reason) - { - fail("corruption at " + bytes + " reason: " + reason); - } - } -} diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/VersionEditTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/VersionEditTest.java new file mode 100644 index 00000000..8cb579c6 --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/VersionEditTest.java @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.util.Slice; +import org.testng.annotations.Test; + +import static org.iq80.leveldb.util.TestUtils.asciiToSlice; +import static org.testng.Assert.assertEquals; + +public class VersionEditTest +{ + @Test + public void testEncodeDecode() throws Exception + { + long kBig = 1L << 50; + + VersionEdit edit = new VersionEdit(); + for (int i = 0; i < 4; i++) { + testEncodeDecode(edit); + edit.addFile(3, kBig + 300 + i, kBig + 400 + i, + new InternalKey(asciiToSlice("foo"), kBig + 500 + i, ValueType.VALUE), + new InternalKey(asciiToSlice("zoo"), kBig + 600 + i, ValueType.DELETION)); + edit.deleteFile(4, kBig + 700 + i); + edit.setCompactPointer(i, new InternalKey(asciiToSlice("x"), kBig + 900 + i, ValueType.VALUE)); + } + + edit.setComparatorName("foo"); + edit.setLogNumber(kBig + 100); + edit.setNextFileNumber(kBig + 200); + edit.setLastSequenceNumber(kBig + 1000); + testEncodeDecode(edit); + } + + void testEncodeDecode(VersionEdit edit) + { + Slice encoded = edit.encode(); + VersionEdit parsed = new VersionEdit(encoded); + Slice encoded2 = parsed.encode(); + assertEquals(encoded, encoded2); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/impl/VersionSetTest.java b/leveldb/src/test/java/org/iq80/leveldb/impl/VersionSetTest.java new file mode 100644 index 00000000..4a190218 --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/impl/VersionSetTest.java @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.impl; + +import org.iq80.leveldb.Options; +import org.iq80.leveldb.table.BytewiseComparator; +import org.iq80.leveldb.util.Slice; +import org.iq80.leveldb.util.TestUtils; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.File; +import java.util.ArrayList; +import java.util.List; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + +public class VersionSetTest +{ + private List files = new ArrayList<>(); + + @BeforeMethod + public void setUp() throws Exception + { + files.clear(); + } + + void add(String smallest, String largest, + long smallestSeq, + long largestSeq) + { + files.add(new FileMetaData(files.size() + 1, 0, new InternalKey(TestUtils.asciiToSlice(smallest), smallestSeq, ValueType.VALUE), new InternalKey(TestUtils.asciiToSlice(largest), largestSeq, ValueType.VALUE))); + } + + int find(String key) + { + InternalKey target = new InternalKey(TestUtils.asciiToSlice(key), 100, ValueType.VALUE); + return newLevel().findFile(target); + } + + private Level newLevel() + { + InternalKeyComparator internalKeyComparator = new InternalKeyComparator(new BytewiseComparator()); + return new Level(1, files, new TableCache(new File("xxxxxxxxxxx"), 0, new BytewiseComparator(), new Options()), internalKeyComparator); + } + + boolean overlaps(String smallest, String largest) + { + Slice s = smallest != null ? TestUtils.asciiToSlice(smallest) : null; + Slice l = largest != null ? TestUtils.asciiToSlice(largest) : null; + return newLevel().someFileOverlapsRange(true, s, l); + } + + @Test + public void testEmpty() throws Exception + { + assertEquals(find("foo"), 0); + assertFalse(overlaps("z", "a")); + assertFalse(overlaps("z", null)); + assertFalse(overlaps("a", null)); + assertFalse(overlaps(null, null)); + } + + @Test + public void testSingle() throws Exception + { + add("p", "q", 100, 100); + assertEquals(find("a"), 0); + assertEquals(find("p"), 0); + assertEquals(find("pl"), 0); + assertEquals(find("q"), 0); + assertEquals(find("ql"), 1); + assertEquals(find("z"), 1); + + assertTrue(!overlaps("a", "b")); + assertTrue(!overlaps("z1", "z2")); + assertTrue(overlaps("a", "p")); + assertTrue(overlaps("a", "q")); + assertTrue(overlaps("a", "z")); + assertTrue(overlaps("p", "p1")); + assertTrue(overlaps("p", "q")); + assertTrue(overlaps("p", "z")); + assertTrue(overlaps("p1", "p2")); + assertTrue(overlaps("p1", "z")); + assertTrue(overlaps("q", "q")); + assertTrue(overlaps("q", "q1")); + + assertTrue(!overlaps(null, "j")); + assertTrue(!overlaps("r", null)); + assertTrue(overlaps(null, "p")); + assertTrue(overlaps(null, "p1")); + assertTrue(overlaps("q", null)); + assertTrue(overlaps(null, null)); + } + + @Test + public void testMultiple() throws Exception + { + add("150", "200", 100, 100); + add("200", "250", 100, 100); + add("300", "350", 100, 100); + add("400", "450", 100, 100); + assertEquals(0, find("100")); + assertEquals(0, find("150")); + assertEquals(0, find("151")); + assertEquals(0, find("199")); + assertEquals(0, find("200")); + assertEquals(1, find("201")); + assertEquals(1, find("249")); + assertEquals(1, find("250")); + assertEquals(2, find("251")); + assertEquals(2, find("299")); + assertEquals(2, find("300")); + assertEquals(2, find("349")); + assertEquals(2, find("350")); + assertEquals(3, find("351")); + assertEquals(3, find("400")); + assertEquals(3, find("450")); + assertEquals(4, find("451")); + + assertTrue(!overlaps("100", "149")); + assertTrue(!overlaps("251", "299")); + assertTrue(!overlaps("451", "500")); + assertTrue(!overlaps("351", "399")); + + assertTrue(overlaps("100", "150")); + assertTrue(overlaps("100", "200")); + assertTrue(overlaps("100", "300")); + assertTrue(overlaps("100", "400")); + assertTrue(overlaps("100", "500")); + assertTrue(overlaps("375", "400")); + assertTrue(overlaps("450", "450")); + assertTrue(overlaps("450", "500")); + } + + @Test + public void testMultipleNullBoundaries() throws Exception + { + add("150", "200", 100, 100); + add("200", "250", 100, 100); + add("300", "350", 100, 100); + add("400", "450", 100, 100); + assertTrue(!overlaps(null, "149")); + assertTrue(!overlaps("451", null)); + assertTrue(overlaps(null, null)); + assertTrue(overlaps(null, "150")); + assertTrue(overlaps(null, "199")); + assertTrue(overlaps(null, "200")); + assertTrue(overlaps(null, "201")); + assertTrue(overlaps(null, "400")); + assertTrue(overlaps(null, "800")); + assertTrue(overlaps("100", null)); + assertTrue(overlaps("200", null)); + assertTrue(overlaps("449", null)); + assertTrue(overlaps("450", null)); + } + + @Test + public void testOverlapSequenceChecks() throws Exception + { + add("200", "200", 5000, 3000); + assertTrue(!overlaps("199", "199")); + assertTrue(!overlaps("201", "300")); + assertTrue(overlaps("200", "200")); + assertTrue(overlaps("190", "200")); + assertTrue(overlaps("200", "210")); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/table/BloomFilterPolicyTest.java b/leveldb/src/test/java/org/iq80/leveldb/table/BloomFilterPolicyTest.java new file mode 100644 index 00000000..b14b3c50 --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/table/BloomFilterPolicyTest.java @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.Slice; +import org.testng.Assert; +import org.testng.annotations.Test; + +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; + +/** + * @author Honore Vasconcelos + */ +public class BloomFilterPolicyTest +{ + public static final int BLOOM_BITS = 10; + private byte[] filter = new byte[0]; + private List keys = new ArrayList<>(); + private BloomFilterPolicy policy = new BloomFilterPolicy(BLOOM_BITS); + + @Test + public void emptyBloom() throws Exception + { + Assert.assertTrue(!matches("hello")); + Assert.assertTrue(!matches("world")); + } + + @Test + public void smallBloom() throws Exception + { + add("hello"); + add("world"); + Assert.assertTrue(matches("hello"), "Key should be found"); + Assert.assertTrue(matches("world"), "Key should be sound"); + Assert.assertTrue(!matches("x")); + Assert.assertTrue(!matches("foo")); + } + + @Test + public void testVariableLength() throws Exception + { + // Count number of filters that significantly exceed the false positive rate + int mediocreFilters = 0; + int goodFilters = 0; + + for (int length = 1; length <= 10000; length = nextLength(length)) { + reset(); + for (int i = 0; i < length; i++) { + keys.add(intToBytes(i)); + } + build(); + + Assert.assertTrue(filter.length <= (length * BLOOM_BITS / 8) + 40); + + // All added keys must match + for (int i = 0; i < length; i++) { + Assert.assertTrue(matches(intToBytes(i))); + } + + // Check false positive rate + double rate = falsePositiveRate(); + System.err.print(String.format("False positives: %5.2f%% @ length = %6d ; bytes = %6d\n", + rate * 100.0, length, filter.length)); + + Assert.assertTrue(rate <= 0.02); // Must not be over 2% + if (rate > 0.0125) { + mediocreFilters++; // Allowed, but not too often + } + else { + goodFilters++; + } + } + System.err.print(String.format("Filters: %d good, %d mediocre\n", + goodFilters, mediocreFilters)); + Assert.assertTrue(mediocreFilters <= goodFilters / 5); + + } + + private double falsePositiveRate() + { + int result = 0; + for (int i = 0; i < 10000; i++) { + if (matches(intToBytes(i + 1000000000))) { + result++; + } + } + return result / 10000.0; + } + + private byte[] intToBytes(int value) + { + byte[] buffer = new byte[4]; + buffer[0] = (byte) (value); + buffer[1] = (byte) (value >>> 8); + buffer[2] = (byte) (value >>> 16); + buffer[3] = (byte) (value >>> 24); + return buffer; + } + + private void reset() + { + keys.clear(); + filter = new byte[0]; + } + + private static int nextLength(int length) + { + if (length < 10) { + length += 1; + } + else if (length < 100) { + length += 10; + } + else if (length < 1000) { + length += 100; + } + else { + length += 1000; + } + return length; + } + + private void add(String hello) + { + keys.add(getBytes(hello)); + } + + private boolean matches(String s) + { + return matches(getBytes(s)); + } + + private boolean matches(byte[] s) + { + if (!keys.isEmpty()) { + build(); + } + return policy.keyMayMatch(new Slice(s), new Slice(filter)); + } + + private byte[] getBytes(String s) + { + return s.getBytes(Charset.forName("ISO-8859-1")); + } + + private void build() + { + List keySlices = new ArrayList<>(); + for (int i = 0; i < keys.size(); i++) { + keySlices.add(new Slice(keys.get(i))); + } + filter = policy.createFilter(keySlices); + keys.clear(); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/table/FilterBlockReaderTest.java b/leveldb/src/test/java/org/iq80/leveldb/table/FilterBlockReaderTest.java new file mode 100644 index 00000000..d56ae593 --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/table/FilterBlockReaderTest.java @@ -0,0 +1,163 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.DynamicSliceOutput; +import org.iq80.leveldb.util.Hash; +import org.iq80.leveldb.util.Slice; +import org.testng.annotations.Test; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import static org.testng.Assert.assertTrue; + +/** + * @author Honore Vasconcelos + */ +public class FilterBlockReaderTest +{ + { + final FilterBlockBuilder filterBlockBuilder = new FilterBlockBuilder(new BloomFilterPolicy(10)); + filterBlockBuilder.startBlock(189); + for (int i = 0; i < 2000; ++i) { + filterBlockBuilder.addKey(new Slice(String.format("key%06d", i).getBytes())); + } + final Slice finish = filterBlockBuilder.finish(); + final FilterBlockReader reader = new FilterBlockReader(new BloomFilterPolicy(10), finish); + for (int i = 0; i < 2000; ++i) { + assertTrue(reader.keyMayMatch(189, new Slice(String.format("key%06d", i).getBytes()))); + } + } + + private static class TestHashFilter implements FilterPolicy + { + @Override + public String name() + { + return "TestHashFilter"; + } + + @Override + public byte[] createFilter(List keys) + { + final DynamicSliceOutput out = new DynamicSliceOutput(100); + for (Slice key : keys) { + out.writeInt(Hash.hash(key.getRawArray(), key.getRawOffset(), key.length(), 1)); + } + return out.slice().copyBytes(); + } + + @Override + public boolean keyMayMatch(Slice key, Slice filter) + { + final int hash = Hash.hash(key.getRawArray(), key.getRawOffset(), key.length(), 1); + for (int i = 0; i + 4 <= filter.length(); i += 4) { + if (hash == filter.getInt(i)) { + return true; + } + } + return false; + } + } + + @Test + public void testEmptyBuilder() throws Exception + { + FilterBlockBuilder builder = new FilterBlockBuilder(new TestHashFilter()); + final Slice finish = builder.finish(); + assertTrue(Arrays.equals(finish.copyBytes(), new byte[]{0, 0, 0, 0, 11})); + final FilterBlockReader reader = new FilterBlockReader(new TestHashFilter(), finish); + assertTrue(reader.keyMayMatch(0, new Slice("foo".getBytes()))); + assertTrue(reader.keyMayMatch(100000, new Slice("foo".getBytes()))); + } + + @Test + public void testSingleChunk() throws IOException + { + FilterBlockBuilder builder = new FilterBlockBuilder(new TestHashFilter()); + builder.startBlock(100); + builder.addKey(new Slice("foo".getBytes())); + builder.addKey(new Slice("bar".getBytes())); + builder.addKey(new Slice("box".getBytes())); + builder.startBlock(200); + builder.addKey(new Slice("box".getBytes())); + builder.startBlock(300); + builder.addKey(new Slice("hello".getBytes())); + Slice block = builder.finish(); + final FilterBlockReader reader = new FilterBlockReader(new TestHashFilter(), block); + assertTrue(reader.keyMayMatch(100, new Slice("foo".getBytes()))); + assertTrue(reader.keyMayMatch(100, new Slice("bar".getBytes()))); + assertTrue(reader.keyMayMatch(100, new Slice("box".getBytes()))); + assertTrue(reader.keyMayMatch(100, new Slice("hello".getBytes()))); + assertTrue(reader.keyMayMatch(100, new Slice("foo".getBytes()))); + assertTrue(!reader.keyMayMatch(100, new Slice("missing".getBytes()))); + assertTrue(!reader.keyMayMatch(100, new Slice("other".getBytes()))); + } + + @Test + public void testMultiChunk() + { + FilterBlockBuilder builder = new FilterBlockBuilder(new TestHashFilter()); + + // First filter + builder.startBlock(0); + builder.addKey(new Slice("foo".getBytes())); + builder.startBlock(2000); + builder.addKey(new Slice("bar".getBytes())); + + // Second filter + builder.startBlock(3100); + builder.addKey(new Slice("box".getBytes())); + + // Third filter is empty + + // Last filter + builder.startBlock(9000); + builder.addKey(new Slice("box".getBytes())); + builder.addKey(new Slice("hello".getBytes())); + + Slice block = builder.finish(); + final FilterBlockReader reader = new FilterBlockReader(new TestHashFilter(), block); + + // Check first filter + assertTrue(reader.keyMayMatch(0, new Slice("foo".getBytes()))); + assertTrue(reader.keyMayMatch(2000, new Slice("bar".getBytes()))); + assertTrue(!reader.keyMayMatch(0, new Slice("box".getBytes()))); + assertTrue(!reader.keyMayMatch(0, new Slice("hello".getBytes()))); + + // Check second filter + assertTrue(reader.keyMayMatch(3100, new Slice("box".getBytes()))); + assertTrue(!reader.keyMayMatch(3100, new Slice("foo".getBytes()))); + assertTrue(!reader.keyMayMatch(3100, new Slice("bar".getBytes()))); + assertTrue(!reader.keyMayMatch(3100, new Slice("hello".getBytes()))); + + // Check third filter (empty) + assertTrue(!reader.keyMayMatch(4100, new Slice("foo".getBytes()))); + assertTrue(!reader.keyMayMatch(4100, new Slice("bar".getBytes()))); + assertTrue(!reader.keyMayMatch(4100, new Slice("box".getBytes()))); + assertTrue(!reader.keyMayMatch(4100, new Slice("hello".getBytes()))); + + // Check last filter + assertTrue(reader.keyMayMatch(9000, new Slice("box".getBytes()))); + assertTrue(reader.keyMayMatch(9000, new Slice("hello".getBytes()))); + assertTrue(!reader.keyMayMatch(9000, new Slice("foo".getBytes()))); + assertTrue(!reader.keyMayMatch(9000, new Slice("bar".getBytes()))); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/table/MMapTableTest.java b/leveldb/src/test/java/org/iq80/leveldb/table/MMRandomInputFileTableTest.java similarity index 68% rename from leveldb/src/test/java/org/iq80/leveldb/table/MMapTableTest.java rename to leveldb/src/test/java/org/iq80/leveldb/table/MMRandomInputFileTableTest.java index 8fd86401..1c2f5fba 100644 --- a/leveldb/src/test/java/org/iq80/leveldb/table/MMapTableTest.java +++ b/leveldb/src/test/java/org/iq80/leveldb/table/MMRandomInputFileTableTest.java @@ -17,19 +17,21 @@ */ package org.iq80.leveldb.table; +import org.iq80.leveldb.util.LRUCache; +import org.iq80.leveldb.util.MMRandomInputFile; import org.iq80.leveldb.util.Slice; +import java.io.File; import java.io.IOException; -import java.nio.channels.FileChannel; import java.util.Comparator; -public class MMapTableTest +public class MMRandomInputFileTableTest extends TableTest { @Override - protected Table createTable(String name, FileChannel fileChannel, Comparator comparator, boolean verifyChecksums) + protected Table createTable(File file, Comparator comparator, boolean verifyChecksums, FilterPolicy filterPolicy) throws IOException { - return new MMapTable(name, fileChannel, comparator, verifyChecksums); + return new Table(MMRandomInputFile.open(file), comparator, verifyChecksums, new LRUCache<>(8 << 20, new BlockHandleSliceWeigher()), filterPolicy); } } diff --git a/leveldb/src/test/java/org/iq80/leveldb/table/TableTest.java b/leveldb/src/test/java/org/iq80/leveldb/table/TableTest.java index 7e025d23..cad9b437 100644 --- a/leveldb/src/test/java/org/iq80/leveldb/table/TableTest.java +++ b/leveldb/src/test/java/org/iq80/leveldb/table/TableTest.java @@ -17,41 +17,70 @@ */ package org.iq80.leveldb.table; +import com.google.common.collect.Lists; +import org.iq80.leveldb.CompressionType; +import org.iq80.leveldb.DBComparator; import org.iq80.leveldb.Options; +import org.iq80.leveldb.impl.DbConstants; +import org.iq80.leveldb.impl.DbImpl; +import org.iq80.leveldb.impl.EnvImpl; +import org.iq80.leveldb.impl.InternalEntry; +import org.iq80.leveldb.impl.InternalKey; +import org.iq80.leveldb.impl.InternalKeyComparator; +import org.iq80.leveldb.impl.MemTable; import org.iq80.leveldb.impl.SeekingIterator; +import org.iq80.leveldb.impl.SeekingIteratorAdapter; +import org.iq80.leveldb.impl.ValueType; +import org.iq80.leveldb.util.AbstractSeekingIterator; import org.iq80.leveldb.util.Closeables; +import org.iq80.leveldb.util.FileUtils; +import org.iq80.leveldb.util.LRUCache; +import org.iq80.leveldb.util.RandomInputFile; import org.iq80.leveldb.util.Slice; import org.iq80.leveldb.util.Slices; +import org.iq80.leveldb.util.Snappy; +import org.iq80.leveldb.util.TestUtils; +import org.iq80.leveldb.util.UnbufferedWritableFile; +import org.iq80.leveldb.util.WritableFile; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; +import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.channels.FileChannel; +import java.nio.ByteBuffer; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.Random; +import java.util.concurrent.ConcurrentSkipListMap; -import static com.google.common.base.Preconditions.checkState; import static java.util.Arrays.asList; +import static org.iq80.leveldb.util.SizeOf.SIZE_OF_INT; +import static org.iq80.leveldb.util.TestUtils.asciiToSlice; +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; import static org.testng.Assert.assertTrue; public abstract class TableTest { private File file; - private RandomAccessFile randomAccessFile; - private FileChannel fileChannel; - protected abstract Table createTable(String name, FileChannel fileChannel, Comparator comparator, boolean verifyChecksums) + protected abstract Table createTable(File file, Comparator comparator, boolean verifyChecksums, FilterPolicy filterPolicy) throws IOException; @Test(expectedExceptions = IllegalArgumentException.class) public void testEmptyFile() throws Exception { - createTable(file.getAbsolutePath(), fileChannel, new BytewiseComparator(), true); + createTable(file, new BytewiseComparator(), true, null); } @Test @@ -105,6 +134,812 @@ public void testMultipleEntriesWithMultipleBlock() tableTest(BlockHelper.estimateBlockSize(Integer.MAX_VALUE, entries) / 3, Integer.MAX_VALUE, entries); } + @Test + public void testZeroRestartPointsInBlock() + { + Block entries = new Block(Slices.allocate(SIZE_OF_INT), new BytewiseComparator()); + + BlockIterator iterator = entries.iterator(); + iterator.seekToFirst(); + assertFalse(iterator.hasNext()); + iterator.seekToLast(); + assertFalse(iterator.hasNext()); + iterator.seek(asciiToSlice("foo")); + assertFalse(iterator.hasNext()); + + } + + private static final class KVMap extends ConcurrentSkipListMap + { + public KVMap(UserComparator useComparator) + { + super(new STLLessThan(useComparator)); + } + + void add(String key, Slice value) + { + put(asciiToSlice(key), value); + } + } + + private static class STLLessThan implements Comparator + { + private UserComparator useComparator; + + public STLLessThan(UserComparator useComparator) + { + this.useComparator = useComparator; + } + + @Override + public int compare(Slice o1, Slice o2) + { + return useComparator.compare(o1, o2); + } + } + + @Test + public void testTableApproximateOffsetOfPlain() throws Exception + { + TableConstructor c = new TableConstructor(new BytewiseComparator()); + c.add("k01", "hello"); + c.add("k02", "hello2"); + c.add("k03", TestUtils.longString(10000, 'x')); + c.add("k04", TestUtils.longString(200000, 'x')); + c.add("k05", TestUtils.longString(300000, 'x')); + c.add("k06", "hello3"); + c.add("k07", TestUtils.longString(100000, 'x')); + + final Options options = new Options(); + options.blockSize(1024); + options.compressionType(CompressionType.NONE); + c.finish(options); + + assertBetween(c.approximateOffsetOf("abc"), 0, 0); + assertBetween(c.approximateOffsetOf("k01"), 0, 0); + assertBetween(c.approximateOffsetOf("k01a"), 0, 0); + assertBetween(c.approximateOffsetOf("k02"), 0, 0); + assertBetween(c.approximateOffsetOf("k03"), 0, 0); + assertBetween(c.approximateOffsetOf("k04"), 10000, 11000); + assertBetween(c.approximateOffsetOf("k04a"), 210000, 211000); + assertBetween(c.approximateOffsetOf("k05"), 210000, 211000); + assertBetween(c.approximateOffsetOf("k06"), 510000, 511000); + assertBetween(c.approximateOffsetOf("k07"), 510000, 511000); + assertBetween(c.approximateOffsetOf("xyz"), 610000, 612000); + } + + @Test + public void testTableTestApproximateOffsetOfCompressed() throws Exception + { + if (!Snappy.available()) { + System.out.println("skipping compression tests"); + return; + } + + Random rnd = new Random(301); + TableConstructor c = new TableConstructor(new BytewiseComparator()); + c.add("k01", "hello"); + c.add("k02", TestUtils.compressibleString(rnd, 0.25, 10000)); + c.add("k03", "hello3"); + c.add("k04", TestUtils.compressibleString(rnd, 0.25, 10000)); + + Options options = new Options(); + options.blockSize(1024); + options.compressionType(CompressionType.SNAPPY); + c.finish(options); + + // Expected upper and lower bounds of space used by compressible strings. + int kSlop = 1000; // Compressor effectiveness varies. + int expected = 2500; // 10000 * compression ratio (0.25) + int minZ = expected - kSlop; + int maxZ = expected + kSlop; + + assertBetween(c.approximateOffsetOf("abc"), 0, kSlop); + assertBetween(c.approximateOffsetOf("k01"), 0, kSlop); + assertBetween(c.approximateOffsetOf("k02"), 0, kSlop); + // Have now emitted a large compressible string, so adjust expected offset. + assertBetween(c.approximateOffsetOf("k03"), minZ, maxZ); + assertBetween(c.approximateOffsetOf("k04"), minZ, maxZ); + // Have now emitted two large compressible strings, so adjust expected offset. + assertBetween(c.approximateOffsetOf("xyz"), 2 * minZ, 2 * maxZ); + } + + static void assertBetween(long val, long low, long high) + { + assertTrue((val >= low) && (val <= high), + String.format("Value %s is not in range [%s, %s]", val, low, high)); + } + + private abstract static class Constructor implements AutoCloseable, Iterable> + { + private final KVMap kvMap; + private final UserComparator comparator; + + public Constructor(final UserComparator comparator) + { + this.comparator = comparator; + this.kvMap = new KVMap(this.comparator); + + } + + void add(Slice key, Slice value) + { + kvMap.put(key, value); + } + + void add(String key, Slice value) + { + kvMap.put(asciiToSlice(key), value); + } + + void add(String key, String value) + { + add(key, asciiToSlice(value)); + } + + public final KVMap finish(Options options) throws IOException + { + finish(options, comparator, kvMap); + return kvMap; + + } + + @Override + public void close() throws Exception + { + } + + protected abstract void finish(Options options, UserComparator comparator, KVMap kvMap) throws IOException; + + public abstract SeekingIterator iterator(); + } + + public static class TableConstructor extends Constructor + { + private Table table; + + public TableConstructor(UserComparator comparator) + { + super(comparator); + } + + @Override + protected void finish(Options options, UserComparator comp, KVMap data) throws IOException + { + StringSink sink = new StringSink(); + TableBuilder builder = new TableBuilder(options, sink, comp); + + for (Map.Entry e : data.entrySet()) { + builder.add(e.getKey(), e.getValue()); + } + builder.finish(); + sink.close(); + + assertEquals(sink.content.length, builder.getFileSize()); + + // Open the table + StringSource source = new StringSource(sink.content); + LRUCache blockCache = new LRUCache<>(options.cacheSize() > 0 ? (int) options.cacheSize() : 8 << 20, new BlockHandleSliceWeigher()); + table = new Table(source, comp, options.verifyChecksums(), blockCache, (FilterPolicy) options.filterPolicy()); + } + + public long approximateOffsetOf(String key) + { + return table.getApproximateOffsetOf(asciiToSlice(key)); + } + + @Override + public SeekingIterator iterator() + { + return table.iterator(); + } + } + + @DataProvider(name = "testArgs") + public Object[][] testArgsProvider() + { + try { + final ReverseDBComparator reverse = new ReverseDBComparator(); + return new Object[][] { + {newHarness(TableConstructor.class, null, 16)}, + {newHarness(TableConstructor.class, null, 1)}, + {newHarness(TableConstructor.class, null, 1024)}, + {newHarness(TableConstructor.class, reverse, 16)}, + {newHarness(TableConstructor.class, reverse, 1)}, + {newHarness(TableConstructor.class, reverse, 1024)}, + + {newHarness(BlockConstructor.class, null, 16)}, + {newHarness(BlockConstructor.class, null, 1)}, + {newHarness(BlockConstructor.class, null, 1014)}, + {newHarness(BlockConstructor.class, reverse, 16)}, + {newHarness(BlockConstructor.class, reverse, 1)}, + {newHarness(BlockConstructor.class, reverse, 1024)}, + + //TODO ported from original but need to be moved away. they don't exactly belong in current package! + {newHarness(MemTableConstructor.class, null, 16)}, + {newHarness(MemTableConstructor.class, reverse, 16)}, + + {newHarness(DbConstructor.class, null, 16)}, + {newHarness(DbConstructor.class, reverse, 16)}, + }; + } + catch (Exception e) { + throw new RuntimeException(e); + } + } + + private static Harness newHarness(Class cls, DBComparator dbComparator, int restartInterval) throws Exception + { + Random rnd = new Random(301 + System.nanoTime()); + return new Harness(rnd, dbComparator, cls, restartInterval); + } + + @Test(dataProvider = "testArgs") + public void testEmpty(Harness harness) throws Exception + { + try { + harness.test(); + } + finally { + harness.close(); + } + } + + @Test(dataProvider = "testArgs") + public void testSimpleEmptyKey(Harness harness) throws Exception + { + try { + harness.add(Slices.EMPTY_SLICE, asciiToSlice("v")); + harness.test(); + } + finally { + harness.close(); + } + } + + @Test(dataProvider = "testArgs") + public void testSimpleSingle(Harness harness) throws Exception + { + try { + harness.add(asciiToSlice("abc"), asciiToSlice("v")); + harness.test(); + } + finally { + harness.close(); + } + } + + @Test(dataProvider = "testArgs") + public void testSimpleMulti(Harness harness) throws Exception + { + try { + harness.add(asciiToSlice("abc"), asciiToSlice("v")); + harness.add(asciiToSlice("abcd"), asciiToSlice("v")); + harness.add(asciiToSlice("ac"), asciiToSlice("v2")); + harness.test(); + } + finally { + harness.close(); + } + } + + @Test(dataProvider = "testArgs") + public void testSimpleSpecialKey(Harness harness) throws Exception + { + try { + harness.add(Slices.wrappedBuffer(new byte[] {-1, -1}), asciiToSlice("v3")); + harness.test(); + } + finally { + harness.close(); + } + } + + @Test(dataProvider = "testArgs") + public void testRandomized(Harness harness) throws Exception + { + try { + Random rnd = harness.getRnd(); + for (int numEntries = 0; numEntries < 2000; + numEntries += (numEntries < 50 ? 1 : 200)) { + if ((numEntries % 10) == 0) { + //System.err.println(String.format("case %s: numEntries = %d", harness, numEntries)); + } + for (int e = 0; e < numEntries; e++) { + harness.add(new Slice(TestUtils.randomKey(rnd, harness.getRandomSkewed(4))), + TestUtils.randomString(rnd, harness.getRandomSkewed(5))); + } + } + harness.test(); + } + finally { + harness.close(); + } + } + + @Test + public void testRandomizedLongDB() throws Exception + { + Random rnd = new Random(301); + try (Harness harness = new Harness<>(rnd, null, DbConstructor.class, 16)) { + int numEntries = 100000; + for (int e = 0; e < numEntries; e++) { + harness.add(new Slice(TestUtils.randomKey(rnd, harness.getRandomSkewed(4))), + TestUtils.randomString(rnd, harness.getRandomSkewed(5))); + } + harness.test(); + // We must have created enough data to force merging + int files = 0; + for (int level = 0; level < DbConstants.NUM_LEVELS; level++) { + files += Integer.valueOf(harness.constructor.db.getProperty("leveldb.num-files-at-level" + level)); + } + assertTrue(files > 0); + } + } + + private static class Harness implements AutoCloseable + { + private final UserComparator comparator; + private String desc; + private final Random rnd; + private T constructor; + private Options options; + + public Harness(Random random, DBComparator comparator, Class cls, int restartInterval) throws Exception + { + this.rnd = random; + this.options = new Options(); + options.blockRestartInterval(restartInterval); + options.blockSize(256); + if (comparator != null) { + this.comparator = new CustomUserComparator(comparator); + options.comparator(comparator); + } + else { + this.comparator = new BytewiseComparator(); + } + constructor = cls.getConstructor(UserComparator.class).newInstance(this.comparator); + desc = cls.getSimpleName() + ", comparator= " + (comparator == null ? null : comparator.getClass().getSimpleName()) + ", restartInterval=" + restartInterval; + } + + public Random getRnd() + { + return rnd; + } + + public T getConstructor() + { + return constructor; + } + + /** + * Skewed: pick "base" uniformly from range [0,maxLog] and then + * return "base" random bits. The effect is to pick a number in the + * range [0,2^maxLog-1] with exponential bias towards smaller numbers. + **/ + private int getRandomSkewed(int maxLog) + { + return rnd.nextInt(Integer.MAX_VALUE) % (1 << rnd.nextInt(Integer.MAX_VALUE) % (maxLog + 1)); + } + + void add(Slice key, Slice value) + { + constructor.add(key, value); + } + + private void testForwardScan(KVMap data) + { + SeekingIterator iter = constructor.iterator(); + + iter.seekToFirst(); + + Iterator> iterator = data.entrySet().iterator(); + while (iter.hasNext()) { + assertEqualsEntries(iter.next(), iterator.next()); + } + if (iterator.hasNext()) { + SeekingIterator iterator1 = constructor.iterator(); + iterator1.seekToFirst(); + ArrayList> entries = Lists.newArrayList(iterator1); + Map.Entry next = iterator.next(); + assertFalse(iterator.hasNext()); + } + assertFalse(iterator.hasNext()); + } + + private static void assertEqualsEntries(Map.Entry actual, Map.Entry expected) + { + assertEquals(actual.getKey(), expected.getKey()); + assertEquals(actual.getValue(), expected.getValue()); + } + + private void testRandomAccess(KVMap data) + { + SeekingIterator iter = constructor.iterator(); + List keys = Lists.newArrayList(data.keySet()); + + //assertFalse(iter.hasNext()); + Iterator> modelIter = data.entrySet().iterator(); + for (int i = 0; i < 200; i++) { + int toss = rnd.nextInt(5); + switch (toss) { + case 0: { + if (iter.hasNext()) { + Map.Entry itNex = iter.next(); + Map.Entry modelNex = modelIter.next(); + assertEqualsEntries(itNex, modelNex); + } + break; + } + + case 1: { + iter.seekToFirst(); + modelIter = data.entrySet().iterator(); + if (modelIter.hasNext()) { + Map.Entry itNex = iter.next(); + Map.Entry modelNex = modelIter.next(); + assertEqualsEntries(itNex, modelNex); + } + break; + } + + case 2: { + modelIter = getEntryIterator(data, iter, keys); + break; + } + + case 3: { + //TODO implement prev to all iterators + } + case 4: { + //TODO implement seekLast to all iterators + break; + } + } + } + } + + private Iterator> getEntryIterator(KVMap data, SeekingIterator iter, List keys) + { + Iterator> modelIter; + Slice key = pickRandomKey(rnd, keys); + modelIter = data.tailMap(key).entrySet().iterator(); + iter.seek(key); + if (modelIter.hasNext()) { + Map.Entry itNex = iter.next(); + Map.Entry modelNex = modelIter.next(); + assertEqualsEntries(itNex, modelNex); + } + return modelIter; + } + + Slice pickRandomKey(Random rnd, List keys) + { + if (keys.isEmpty()) { + return asciiToSlice("foo"); + } + else { + int index = rnd.nextInt(keys.size()); + Slice result = keys.get(index).copySlice(); + switch (rnd.nextInt(3)) { + case 0: + // Return an existing key + break; + case 1: { + // Attempt to return something smaller than an existing key + int idx1 = result.length() - 1; + if (result.length() > 0 && result.getByte(idx1) > '\0') { + result.setByte(idx1, result.getByte(idx1) - 1); + } + break; + } + case 2: { + // Return something larger than an existing key + result = increment(comparator, result); + break; + } + } + return result; + } + } + + Slice increment(Comparator cmp, Slice key) + { + Slice k; + if (cmp instanceof BytewiseComparator) { + k = key; + } + else { + k = reverse(key); + + } + byte[] bytes = Arrays.copyOf(k.getBytes(), k.length() + 1); + bytes[k.length()] = 0; + return new Slice(bytes); + } + + private Slice reverse(Slice key) + { + byte[] bytes = new byte[key.length()]; + for (int i = 0, k = key.length() - 1; k >= 0; i++, k--) { + bytes[i] = key.getByte(k); + } + return new Slice(bytes); + } + + void test() throws IOException + { + KVMap data = constructor.finish(options); + + testForwardScan(data); + //TODO TestBackwardScan(data); + testRandomAccess(data); + } + + @Override + public void close() throws Exception + { + constructor.close(); + } + + @Override + public String toString() + { + return desc; + } + } + + private static class BlockConstructor extends Constructor + { + private Block entries; + + public BlockConstructor(UserComparator comparator) + { + super(comparator); + } + + @Override + public SeekingIterator iterator() + { + return entries.iterator(); + } + + @Override + protected void finish(Options options, UserComparator cmp, KVMap map) throws IOException + { + BlockBuilder builder = new BlockBuilder(256, options.blockRestartInterval(), cmp); + + for (Map.Entry entry : map.entrySet()) { + builder.add(entry.getKey(), entry.getValue()); + } + + // Open the block + Slice data = builder.finish(); + entries = new Block(data, cmp); + } + } + + private static class MemTableConstructor extends Constructor + { + private MemTable table; + + public MemTableConstructor(UserComparator comparator) + { + super(comparator); + } + + @Override + protected void finish(Options options, UserComparator comparator, KVMap kvMap) throws IOException + { + table = new MemTable(new InternalKeyComparator(comparator)); + int seq = 1; + for (Map.Entry e : kvMap.entrySet()) { + table.add(seq++, ValueType.VALUE, e.getKey(), e.getValue()); + } + } + + @Override + public SeekingIterator iterator() + { + return new AbstractSeekingIterator() + { + MemTable.MemTableIterator iterator = table.iterator(); + + @Override + protected void seekToFirstInternal() + { + iterator.seekToFirst(); + } + + @Override + protected void seekInternal(Slice targetKey) + { + iterator.seek(new InternalKey(targetKey, Integer.MAX_VALUE, ValueType.VALUE)); + } + + @Override + protected Map.Entry getNextElement() + { + if (iterator.hasNext()) { + InternalEntry next = iterator.next(); + return new AbstractMap.SimpleEntry<>(next.getKey().getUserKey(), next.getValue()); + } + else { + return null; + } + } + }; + } + } + + private static class DbConstructor extends Constructor + { + private DbImpl db; + private File tmpDir; + + public DbConstructor(UserComparator comparator) + { + super(comparator); + } + + @Override + protected void finish(Options options, UserComparator comparator, KVMap kvMap) throws IOException + { + options + .createIfMissing(true) + .errorIfExists(true) + .writeBufferSize(10000); // Something small to force merging + tmpDir = FileUtils.createTempDir("leveldb"); + this.db = new DbImpl(options, tmpDir, new EnvImpl()); + for (Map.Entry entry : kvMap.entrySet()) { + db.put(entry.getKey().getBytes(), entry.getValue().getBytes()); + } + + } + + @Override + public SeekingIterator iterator() + { + return new AbstractSeekingIterator() + { + SeekingIteratorAdapter iterator = db.iterator(); + + @Override + protected void seekToFirstInternal() + { + iterator.seekToFirst(); + } + + @Override + protected void seekInternal(Slice targetKey) + { + iterator.seek(targetKey.getBytes()); + } + + @Override + protected Map.Entry getNextElement() + { + if (iterator.hasNext()) { + SeekingIteratorAdapter.DbEntry next = iterator.next(); + return new AbstractMap.SimpleEntry<>(next.getKeySlice(), next.getValueSlice()); + } + else { + return null; + } + } + }; + } + + @Override + public void close() throws Exception + { + super.close(); + Closeables.closeQuietly(db); + FileUtils.deleteRecursively(tmpDir); + } + } + + public class ReverseDBComparator + implements DBComparator + { + private final BytewiseComparator com = new BytewiseComparator(); + + @Override + public String name() + { + return "leveldb.ReverseBytewiseComparator"; + } + + @Override + public byte[] findShortestSeparator(byte[] start, byte[] limit) + { + Slice s = reverseToSlice(start); + Slice l = reverseToSlice(limit); + return reverseB(com.findShortestSeparator(s, l).getBytes()); + } + + private Slice reverseToSlice(byte[] key) + { + return new Slice(reverseB(key)); + } + + private byte[] reverseB(byte[] key) + { + byte[] bytes = new byte[key.length]; + for (int i = 0, k = key.length - 1; k >= 0; i++, k--) { + bytes[i] = key[k]; + } + return bytes; + } + + @Override + public byte[] findShortSuccessor(byte[] key) + { + Slice s = reverseToSlice(key); + return reverseB(com.findShortSuccessor(s).getBytes()); + } + + @Override + public int compare(byte[] a, byte[] b) + { + return com.compare(reverseToSlice(a), reverseToSlice(b)); + } + } + + private static class StringSource implements RandomInputFile + { + byte[] data; + + public StringSource(byte[] data) + { + this.data = data; + } + + @Override + public long size() + { + return data.length; + } + + @Override + public ByteBuffer read(long offset, int length) + { + return Slices.wrappedBuffer(data).copySlice((int) offset, length).toByteBuffer(); + } + + @Override + public void close() + { + } + } + + private static class StringSink implements WritableFile + { + private ByteArrayOutputStream sb = new ByteArrayOutputStream(); + + byte[] content; + + @Override + public void append(Slice data) throws IOException + { + sb.write(data.getBytes()); + } + + @Override + public void force() throws IOException + { + content = sb.toByteArray(); + } + + @Override + public void close() throws IOException + { + content = sb.toByteArray(); + sb.close(); + sb = null; + } + } + private void tableTest(int blockSize, int blockRestartInterval, BlockEntry... entries) throws IOException { @@ -116,14 +951,14 @@ private void tableTest(int blockSize, int blockRestartInterval, List { reopenFile(); Options options = new Options().blockSize(blockSize).blockRestartInterval(blockRestartInterval); - TableBuilder builder = new TableBuilder(options, fileChannel, new BytewiseComparator()); + TableBuilder builder = new TableBuilder(options, UnbufferedWritableFile.open(file), new BytewiseComparator()); for (BlockEntry entry : entries) { builder.add(entry); } builder.finish(); - Table table = createTable(file.getAbsolutePath(), fileChannel, new BytewiseComparator(), true); + Table table = createTable(file, new BytewiseComparator(), true, null); SeekingIterator seekingIterator = table.iterator(); BlockHelper.assertSequence(seekingIterator, entries); @@ -162,7 +997,6 @@ public void setUp() throws Exception { reopenFile(); - checkState(0 == fileChannel.position(), "Expected fileChannel.position %s to be 0", fileChannel.position()); } private void reopenFile() @@ -170,16 +1004,13 @@ private void reopenFile() { file = File.createTempFile("table", ".db"); file.delete(); - randomAccessFile = new RandomAccessFile(file, "rw"); - fileChannel = randomAccessFile.getChannel(); + com.google.common.io.Files.touch(file); } @AfterMethod public void tearDown() throws Exception { - Closeables.closeQuietly(fileChannel); - Closeables.closeQuietly(randomAccessFile); file.delete(); } } diff --git a/leveldb/src/test/java/org/iq80/leveldb/table/UnbufferedRandomInputFileTableTest.java b/leveldb/src/test/java/org/iq80/leveldb/table/UnbufferedRandomInputFileTableTest.java new file mode 100644 index 00000000..667146f9 --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/table/UnbufferedRandomInputFileTableTest.java @@ -0,0 +1,37 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.table; + +import org.iq80.leveldb.util.UnbufferedRandomInputFile; +import org.iq80.leveldb.util.LRUCache; +import org.iq80.leveldb.util.Slice; + +import java.io.File; +import java.io.IOException; +import java.util.Comparator; + +public class UnbufferedRandomInputFileTableTest + extends TableTest +{ + @Override + protected Table createTable(File file, Comparator comparator, boolean verifyChecksums, FilterPolicy filterPolicy) + throws IOException + { + return new Table(UnbufferedRandomInputFile.open(file), comparator, verifyChecksums, new LRUCache<>(8 << 5, new BlockHandleSliceWeigher()), filterPolicy); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/util/HashTest.java b/leveldb/src/test/java/org/iq80/leveldb/util/HashTest.java new file mode 100644 index 00000000..b6bc977d --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/util/HashTest.java @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + +/** + * @author Honore Vasconcelos + */ +public class HashTest +{ + @Test + public void testSignedUnsignedTrue() throws Exception + { + byte[] data1 = {0x62}; + byte[] data2 = {(byte) 0xc3, (byte) 0x97}; + byte[] data3 = {(byte) 0xe2, (byte) 0x99, (byte) 0xa5}; + byte[] data4 = {(byte) 0xe1, (byte) 0x80, (byte) 0xb9, 0x32}; + byte[] data5 = { + 0x01, (byte) 0xc0, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x14, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x04, 0x00, + 0x00, 0x00, 0x00, 0x14, + 0x00, 0x00, 0x00, 0x18, + 0x28, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + }; + assertEquals(Hash.hash(new byte[0], 0xbc9f1d34), 0xbc9f1d34); + assertEquals(Hash.hash(data1, 0xbc9f1d34), 0xef1345c4); + assertEquals(Hash.hash(data2, 0xbc9f1d34), 0x5b663814); + assertEquals(Hash.hash(data3, 0xbc9f1d34), 0x323c078f); + assertEquals(Hash.hash(data4, 0xbc9f1d34), 0xed21633a); + assertEquals(Hash.hash(data5, 0x12345678), 0xf333dabb); + + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/util/LRUCacheTest.java b/leveldb/src/test/java/org/iq80/leveldb/util/LRUCacheTest.java new file mode 100644 index 00000000..eddf6f7b --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/util/LRUCacheTest.java @@ -0,0 +1,120 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import com.google.common.cache.CacheLoader; +import com.google.common.cache.Weigher; +import org.testng.annotations.Test; + +import java.util.concurrent.ExecutionException; + +import static org.testng.Assert.assertEquals; + +/** + * @author Honore Vasconcelos + */ +public class LRUCacheTest +{ + @Test + public void testMultipleClientWithSameKey() throws Exception + { + final LRUCache cache = new LRUCache<>(2 * 5, new CountWeigher()); + final CacheWithStatistics[] caches = CacheWithStatistics.withStatistics(cache, 2); + + for (int x = 0; x < 3; ++x) { + for (int i = 0; i < caches.length; ++i) { + for (int j = 0; j < 5; ++j) { + assertEquals(((int) caches[i].load(j)), j * (i + 1) * 3); + } + } + //only first run should load data into cache, as such, only 5 load should be executed instead of 30 + for (CacheWithStatistics cache1 : caches) { + assertEquals(cache1.count, 5); + } + } + } + + @Test + public void testLimitIsRespected() throws Exception + { + // size is respected by guava but we could have some type of bug :) + final LRUCache cache = new LRUCache<>(2, new CountWeigher()); + final CacheWithStatistics[] caches = CacheWithStatistics.withStatistics(cache, 2); + caches[0].load(0); + caches[0].load(1); + caches[0].load(2); + caches[0].load(1); + caches[0].load(0); + + assertEquals(caches[0].count, 4); + assertEquals(caches[1].count, 0); + + caches[1].load(0); + caches[0].load(0); + assertEquals(caches[0].count, 4); + assertEquals(caches[1].count, 1); + + caches[0].load(2); + caches[1].load(1); + assertEquals(caches[0].count, 5); + assertEquals(caches[1].count, 2); + } + + private static class CacheWithStatistics implements LRUCache.LRUSubCache + { + private final LRUCache.LRUSubCache cache; + private int count; + + private CacheWithStatistics(LRUCache cache, final int i) + { + this.cache = cache.subCache(new CacheLoader() + { + @Override + public Integer load(Integer key) + { + count++; + return key * (i + 1) * 3; + } + }); + } + + static CacheWithStatistics[] withStatistics(LRUCache cache, int clients) + { + final CacheWithStatistics[] caches = new CacheWithStatistics[clients]; + for (int i = 0; i < clients; ++i) { + caches[i] = new CacheWithStatistics(cache, i); + } + return caches; + } + + @Override + public Integer load(Integer key) throws ExecutionException + { + return cache.load(key); + } + } + + private static class CountWeigher implements Weigher + { + @Override + public int weigh(Integer key, Integer value) + { + return -31; //hack to simplify unit test + } + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/util/SequentialFileImplTest.java b/leveldb/src/test/java/org/iq80/leveldb/util/SequentialFileImplTest.java new file mode 100644 index 00000000..0454e18a --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/util/SequentialFileImplTest.java @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import java.io.File; +import java.io.FileOutputStream; + +import static org.testng.AssertJUnit.assertEquals; + +public class SequentialFileImplTest +{ + File file; + + @BeforeMethod + public void setUp() throws Exception + { + file = File.createTempFile("test", ".log"); + } + + @Test + public void testCheckReadBounds() throws Exception + { + try (FileOutputStream f = new FileOutputStream(file)) { + for (int i = 0; i < 200; ++i) { + f.write(i); + } + } + try (SequentialFile open = SequentialFileImpl.open(file)) { + DynamicSliceOutput destination = new DynamicSliceOutput(10); + assertEquals(10, open.read(10, destination)); + Slice slice = destination.slice(); + assertEquals(new Slice(new byte[] {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}), slice); + byte[] bytes = new byte[190]; + for (int i = 10, k = 0; i < 200; ++i, k++) { + bytes[k] = (byte) i; + } + DynamicSliceOutput destination1 = new DynamicSliceOutput(10); + assertEquals(190, open.read(200, destination1)); + Slice slice1 = destination1.slice(); + assertEquals(new Slice(bytes), slice1); + assertEquals(-1, open.read(10, new DynamicSliceOutput(10))); //EOF + assertEquals(0, open.read(0, new DynamicSliceOutput(10))); //EOF + } + } + + @AfterMethod + public void tearDown() + { + file.delete(); + } +} diff --git a/leveldb/src/test/java/org/iq80/leveldb/util/TestUtils.java b/leveldb/src/test/java/org/iq80/leveldb/util/TestUtils.java new file mode 100644 index 00000000..2c33b80c --- /dev/null +++ b/leveldb/src/test/java/org/iq80/leveldb/util/TestUtils.java @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2011 the original author or authors. + * See the notice.md file distributed with this work for additional + * information regarding copyright ownership. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.iq80.leveldb.util; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; + +import static java.nio.charset.StandardCharsets.US_ASCII; + +public final class TestUtils +{ + private TestUtils() + { + //utility + } + + public static Slice randomString(Random rnd, int len) + { + final byte[] bytes = new byte[len]; + for (int i = 0; i < len; i++) { + bytes[i] = (byte) (' ' + rnd.nextInt(95)); // ' ' .. '~' + } + return new Slice(bytes); + } + + public static byte[] randomKey(Random rnd, int len) + { + // Make sure to generate a wide variety of characters so we + // test the boundary conditions for short-key optimizations. + byte[] kTestChars = { + 0, 1, 'a', 'b', 'c', 'd', 'e', (byte) 0xfd, (byte) 0xfe, (byte) 0xff + }; + byte[] result = new byte[len]; + for (int i = 0; i < len; i++) { + result[i] = kTestChars[rnd.nextInt(kTestChars.length)]; + } + return result; + } + + public static Slice compressibleString(Random rnd, double compressedFraction, int len) throws IOException + { + int raw = (int) (len * compressedFraction); + if (raw < 1) { + raw = 1; + } + final byte[] bytes = randomString(rnd, raw).getBytes(); + + final ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream(len); + while (byteOutputStream.size() < len) { + byteOutputStream.write(bytes); + } + final Slice slice = new Slice(byteOutputStream.toByteArray()); + byteOutputStream.close(); + return slice; + } + + public static String longString(int length, char character) + { + char[] chars = new char[length]; + Arrays.fill(chars, character); + return new String(chars); + } + + public static Slice asciiToSlice(String value) + { + return Slices.copiedBuffer(value, US_ASCII); + } + + public static byte[] asciiToBytes(String value) + { + return asciiToSlice(value).getBytes(); + } +}