forked from opensearch-project/opensearch-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' into support-on-demand-incremental-refresh-rebased
- Loading branch information
Showing
20 changed files
with
1,212 additions
and
90 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
flint-core/src/main/java/org/opensearch/flint/core/field/bloomfilter/BloomFilter.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.flint.core.field.bloomfilter; | ||
|
||
import java.io.IOException; | ||
import java.io.OutputStream; | ||
|
||
/** | ||
* Bloom filter interface inspired by [[org.apache.spark.util.sketch.BloomFilter]] but adapts to | ||
* Flint index use and remove unnecessary API. | ||
*/ | ||
public interface BloomFilter { | ||
|
||
/** | ||
* Bloom filter binary format version. | ||
*/ | ||
enum Version { | ||
V1(1); | ||
|
||
private final int versionNumber; | ||
|
||
Version(int versionNumber) { | ||
this.versionNumber = versionNumber; | ||
} | ||
|
||
public int getVersionNumber() { | ||
return versionNumber; | ||
} | ||
} | ||
|
||
/** | ||
* @return the number of bits in the underlying bit array. | ||
*/ | ||
long bitSize(); | ||
|
||
/** | ||
* Put an item into this bloom filter. | ||
* | ||
* @param item Long value item to insert | ||
* @return true if bits changed which means the item must be first time added to the bloom filter. | ||
* Otherwise, it maybe the first time or not. | ||
*/ | ||
boolean put(long item); | ||
|
||
/** | ||
* Merge this bloom filter with another bloom filter. | ||
* | ||
* @param bloomFilter bloom filter to merge | ||
* @return bloom filter after merged | ||
*/ | ||
BloomFilter merge(BloomFilter bloomFilter); | ||
|
||
/** | ||
* @param item Long value item to check | ||
* @return true if the item may exist in this bloom filter. Otherwise, it is definitely not exist. | ||
*/ | ||
boolean mightContain(long item); | ||
|
||
/** | ||
* Serialize this bloom filter and write it to an output stream. | ||
* | ||
* @param out output stream to write | ||
*/ | ||
void writeTo(OutputStream out) throws IOException; | ||
} |
149 changes: 149 additions & 0 deletions
149
flint-core/src/main/java/org/opensearch/flint/core/field/bloomfilter/classic/BitArray.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
/* | ||
* This file contains code from the Apache Spark project (original license below). | ||
* It contains modifications, which are licensed as above: | ||
*/ | ||
|
||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.opensearch.flint.core.field.bloomfilter.classic; | ||
|
||
import java.io.DataInputStream; | ||
import java.io.DataOutputStream; | ||
import java.io.IOException; | ||
import java.util.Arrays; | ||
|
||
/** | ||
* Bit array. | ||
*/ | ||
class BitArray { | ||
private final long[] data; | ||
private long bitCount; | ||
|
||
BitArray(long numBits) { | ||
this(new long[numWords(numBits)]); | ||
} | ||
|
||
BitArray(long[] data) { | ||
this.data = data; | ||
long bitCount = 0; | ||
for (long word : data) { | ||
bitCount += Long.bitCount(word); | ||
} | ||
this.bitCount = bitCount; | ||
} | ||
|
||
/** | ||
* @return array length in bits | ||
*/ | ||
long bitSize() { | ||
return (long) data.length * Long.SIZE; | ||
} | ||
|
||
/** | ||
* @param index bit index | ||
* @return whether bits at the given index is set | ||
*/ | ||
boolean get(long index) { | ||
return (data[(int) (index >>> 6)] & (1L << index)) != 0; | ||
} | ||
|
||
/** | ||
* Set bits at the given index. | ||
* | ||
* @param index bit index | ||
* @return bit changed or not | ||
*/ | ||
boolean set(long index) { | ||
if (!get(index)) { | ||
data[(int) (index >>> 6)] |= (1L << index); | ||
bitCount++; | ||
return true; | ||
} | ||
return false; | ||
} | ||
|
||
/** | ||
* Put another array in this bit array. | ||
* | ||
* @param array other bit array | ||
*/ | ||
void putAll(BitArray array) { | ||
assert data.length == array.data.length : "BitArrays must be of equal length when merging"; | ||
long bitCount = 0; | ||
for (int i = 0; i < data.length; i++) { | ||
data[i] |= array.data[i]; | ||
bitCount += Long.bitCount(data[i]); | ||
} | ||
this.bitCount = bitCount; | ||
} | ||
|
||
/** | ||
* Serialize and write out this bit array to the given output stream. | ||
* | ||
* @param out output stream | ||
*/ | ||
void writeTo(DataOutputStream out) throws IOException { | ||
out.writeInt(data.length); | ||
for (long datum : data) { | ||
out.writeLong(datum); | ||
} | ||
} | ||
|
||
/** | ||
* Deserialize and read bit array from the given input stream. | ||
* | ||
* @param in input stream | ||
* @return bit array | ||
*/ | ||
static BitArray readFrom(DataInputStream in) throws IOException { | ||
int numWords = in.readInt(); | ||
long[] data = new long[numWords]; | ||
for (int i = 0; i < numWords; i++) { | ||
data[i] = in.readLong(); | ||
} | ||
return new BitArray(data); | ||
} | ||
|
||
private static int numWords(long numBits) { | ||
if (numBits <= 0) { | ||
throw new IllegalArgumentException("numBits must be positive, but got " + numBits); | ||
} | ||
long numWords = (long) Math.ceil(numBits / 64.0); | ||
if (numWords > Integer.MAX_VALUE) { | ||
throw new IllegalArgumentException("Can't allocate enough space for " + numBits + " bits"); | ||
} | ||
return (int) numWords; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
if (this == other) return true; | ||
if (!(other instanceof BitArray)) return false; | ||
BitArray that = (BitArray) other; | ||
return Arrays.equals(data, that.data); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Arrays.hashCode(data); | ||
} | ||
} |
Oops, something went wrong.