diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst index 3f0ff6cdd0103..275702968622b 100644 --- a/docs/source/developers/java/development.rst +++ b/docs/source/developers/java/development.rst @@ -110,30 +110,58 @@ integration tests, you would do: Code Style ========== -Java code style is enforced with Checkstyle. The configuration is located at `checkstyle`_. -You can also just check the style without building the project. -This checks the code style of all source code under the current directory or from within an individual module. +The current Java code styles are configured as follows: -.. code-block:: +- Indent: Tabs & spaces (2 spaces per tab) +- Google Java Format: Reformats Java source code to comply with `Google Java Style`_. +- Configure license headers for Java & XML files - $ mvn checkstyle:check -Maven ``pom.xml`` style is enforced with Spotless using `Apache Maven pom.xml guidelines`_ -You can also just check the style without building the project. -This checks the style of all pom.xml files under the current directory or from within an individual module. +Java code style is checked by `Spotless`_ during the build, and the continuous integration build will verify +that changes adhere to the style guide. -.. code-block:: +Automatically fixing code style issues +-------------------------------------- + +- You can also just check the style without building the project with `mvn spotless:check`. +- The Java code style can be corrected from the command line by using the following commands: `mvn spotless:apply`. + +.. code-block:: bash - $ mvn spotless:check + The following files had format violations: + src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java + @@ -15,7 +15,6 @@ + ·*·limitations·under·the·License. + ·*/ -This applies the style to all pom.xml files under the current directory or from within an individual module. + - + package·org.apache.arrow.algorithm.rank; + + import·java.util.stream.IntStream; + Run 'mvn spotless:apply' to fix these violations. + +Code Formatter for Intellij IDEA and Eclipse +-------------------------------------------- + +Follow the instructions for: + +- `Eclipse`_ +- `IntelliJ`_ + +Code style enforced with Checkstyle for most of the modules. The configuration is located at `checkstyle`_. +You can also just check the style without building the project. +This checks the code style of all source code under the current directory or from within an individual module. +Checkstyle will be removed once Spotless is fully integrated. .. code-block:: - $ mvn spotless:apply + $ mvn checkstyle:check .. _benchmark: https://github.com/ursacomputing/benchmarks .. _archery: https://github.com/apache/arrow/blob/main/dev/conbench_envs/README.md#L188 .. _conbench: https://github.com/conbench/conbench .. _checkstyle: https://github.com/apache/arrow/blob/main/java/dev/checkstyle/checkstyle.xml -.. _Apache Maven pom.xml guidelines: https://maven.apache.org/developers/conventions/code.html#pom-code-convention +.. _Spotless: https://github.com/diffplug/spotless +.. _Google Java Style: https://google.github.io/styleguide/javaguide.html +.. _Eclipse: https://github.com/google/google-java-format?tab=readme-ov-file#eclipse +.. _IntelliJ: https://github.com/google/google-java-format?tab=readme-ov-file#intellij-android-studio-and-other-jetbrains-ides \ No newline at end of file diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 0854da48b718a..6a9326b032dea 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -20,6 +20,10 @@ Arrow Algorithms (Experimental/Contrib) A collection of algorithms for working with ValueVectors. + + 2.30.0 + + org.apache.arrow @@ -48,5 +52,87 @@ - + + + + org.apache.maven.plugins + maven-checkstyle-plugin + 3.1.0 + + true + + + + + + + + spotless + + true + + + + + com.diffplug.spotless + spotless-maven-plugin + ${spotless.version} + + + + + + pom.xml + + + ${maven.multiModuleProjectDirectory}/java/spotless/asf-xml.license + (<configuration|<project) + + + + + + **/*.java + + + ${maven.multiModuleProjectDirectory}/java/spotless/asf-java.license + package + + + + + + 1.17.0 + + + + + + true + 2 + + + true + 2 + + + false + + + + + + spotless-check + + apply + check + + validate + + + + + + + diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java index 8811e43d3d08d..e9364b2a85b7b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/DeduplicationUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,18 @@ import org.apache.arrow.vector.compare.RangeEqualsVisitor; import org.apache.arrow.vector.util.DataSizeRoundingUtil; -/** - * Utilities for vector deduplication. - */ +/** Utilities for vector deduplication. */ class DeduplicationUtils { /** * Gets the start positions of the first distinct values in a vector. + * * @param vector the target vector. * @param runStarts the bit set to hold the start positions. * @param vector type. */ - public static void populateRunStartIndicators(V vector, ArrowBuf runStarts) { + public static void populateRunStartIndicators( + V vector, ArrowBuf runStarts) { int bufSize = DataSizeRoundingUtil.divideBy8Ceil(vector.getValueCount()); Preconditions.checkArgument(runStarts.capacity() >= bufSize); runStarts.setZero(0, bufSize); @@ -55,6 +54,7 @@ public static void populateRunStartIndicators(V vector, /** * Gets the run lengths, given the start positions. + * * @param runStarts the bit set for start positions. * @param runLengths the run length vector to populate. * @param valueCount the number of values in the bit set. @@ -76,15 +76,15 @@ public static void populateRunLengths(ArrowBuf runStarts, IntVector runLengths, } /** - * Gets distinct values from the input vector by removing adjacent - * duplicated values. + * Gets distinct values from the input vector by removing adjacent duplicated values. + * * @param indicators the bit set containing the start positions of distinct values. * @param inputVector the input vector. * @param outputVector the output vector. * @param vector type. */ public static void populateDeduplicatedValues( - ArrowBuf indicators, V inputVector, V outputVector) { + ArrowBuf indicators, V inputVector, V outputVector) { int dstIdx = 0; for (int srcIdx = 0; srcIdx < inputVector.getValueCount(); srcIdx++) { if (BitVectorHelper.get(indicators, srcIdx) != 0) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java index 5ef03cbe4a734..4e49de14f5956 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/deduplicate/VectorRunDeduplicator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import org.apache.arrow.memory.ArrowBuf; @@ -26,29 +25,28 @@ import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** - * Remove adjacent equal elements from a vector. - * If the vector is sorted, it removes all duplicated values in the vector. + * Remove adjacent equal elements from a vector. If the vector is sorted, it removes all duplicated + * values in the vector. + * * @param vector type. */ public class VectorRunDeduplicator implements AutoCloseable { /** - * Bit set for distinct values. - * If the value at some index is not equal to the previous value, - * its bit is set to 1, otherwise its bit is set to 0. + * Bit set for distinct values. If the value at some index is not equal to the previous value, its + * bit is set to 1, otherwise its bit is set to 0. */ private ArrowBuf distinctValueBuffer; - /** - * The vector to deduplicate. - */ + /** The vector to deduplicate. */ private final V vector; private final BufferAllocator allocator; /** * Constructs a vector run deduplicator for a given vector. - * @param vector the vector to deduplicate. Ownership is NOT taken. + * + * @param vector the vector to deduplicate. Ownership is NOT taken. * @param allocator the allocator used for allocating buffers for start indices. */ public VectorRunDeduplicator(V vector, BufferAllocator allocator) { @@ -65,17 +63,20 @@ private void createDistinctValueBuffer() { /** * Gets the number of values which are different from their predecessor. + * * @return the run count. */ public int getRunCount() { if (distinctValueBuffer == null) { createDistinctValueBuffer(); } - return vector.getValueCount() - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); + return vector.getValueCount() + - BitVectorHelper.getNullCount(distinctValueBuffer, vector.getValueCount()); } /** * Gets the vector with deduplicated adjacent values removed. + * * @param outVector the output vector. */ public void populateDeduplicatedValues(V outVector) { @@ -88,6 +89,7 @@ public void populateDeduplicatedValues(V outVector) { /** * Gets the length of each distinct value. + * * @param lengthVector the vector for holding length values. */ public void populateRunLengths(IntVector lengthVector) { @@ -95,7 +97,8 @@ public void populateRunLengths(IntVector lengthVector) { createDistinctValueBuffer(); } - DeduplicationUtils.populateRunLengths(distinctValueBuffer, lengthVector, vector.getValueCount()); + DeduplicationUtils.populateRunLengths( + distinctValueBuffer, lengthVector, vector.getValueCount()); } @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java index 398368d1fc612..88c4e4dc65450 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryBuilder.java @@ -14,33 +14,31 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.ValueVector; /** - * A dictionary builder is intended for the scenario frequently encountered in practice: - * the dictionary is not known a priori, so it is generated dynamically. - * In particular, when a new value arrives, it is tested to check if it is already - * in the dictionary. If so, it is simply neglected, otherwise, it is added to the dictionary. - *

- * The dictionary builder is intended to build a single dictionary. - * So it cannot be used for different dictionaries. - *

+ * A dictionary builder is intended for the scenario frequently encountered in practice: the + * dictionary is not known a priori, so it is generated dynamically. In particular, when a new value + * arrives, it is tested to check if it is already in the dictionary. If so, it is simply neglected, + * otherwise, it is added to the dictionary. + * + *

The dictionary builder is intended to build a single dictionary. So it cannot be used for + * different dictionaries. + * *

Below gives the sample code for using the dictionary builder + * *

{@code
  * DictionaryBuilder dictionaryBuilder = ...
  * ...
  * dictionaryBuild.addValue(newValue);
  * ...
  * }
- *

- *

- * With the above code, the dictionary vector will be populated, - * and it can be retrieved by the {@link DictionaryBuilder#getDictionary()} method. - * After that, dictionary encoding can proceed with the populated dictionary.. - *

+ * + *

With the above code, the dictionary vector will be populated, and it can be retrieved by the + * {@link DictionaryBuilder#getDictionary()} method. After that, dictionary encoding can proceed + * with the populated dictionary.. * * @param the dictionary vector type. */ @@ -58,7 +56,7 @@ public interface DictionaryBuilder { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ int addValue(V targetVector, int targetIndex); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java index cda7b3bf9540e..16e27c3a23e72 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/DictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -22,8 +21,9 @@ /** * A dictionary encoder translates one vector into another one based on a dictionary vector. - * According to Arrow specification, the encoded vector must be an integer based vector, which - * is the index of the original vector element in the dictionary. + * According to Arrow specification, the encoded vector must be an integer based vector, which is + * the index of the original vector element in the dictionary. + * * @param type of the encoded vector. * @param type of the vector to encode. It is also the type of the dictionary vector. */ @@ -31,9 +31,10 @@ public interface DictionaryEncoder the dictionary vector type. */ -public class HashTableBasedDictionaryBuilder implements DictionaryBuilder { +public class HashTableBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The hasher used for calculating the hash code. - */ + /** The hasher used for calculating the hash code. */ private final ArrowBufHasher hasher; - /** - * Next pointer to try to add to the hash table. - */ + /** Next pointer to try to add to the hash table. */ private ArrowBufPointer nextPointer; /** @@ -83,7 +73,7 @@ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull) { * * @param dictionary the dictionary to populate. * @param encodeNull if null values should be added to the dictionary. - * @param hasher the hasher used to compute the hash code. + * @param hasher the hasher used to compute the hash code. */ public HashTableBasedDictionaryBuilder(V dictionary, boolean encodeNull, ArrowBufHasher hasher) { this.dictionary = dictionary; @@ -125,7 +115,7 @@ public int addValues(V targetVector) { * Try to add an element from the target vector to the dictionary. * * @param targetVector the target vector containing new element. - * @param targetIndex the index of the new element in the target vector. + * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. */ @Override diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java index bea1a784c3d6a..ac7a7d32bf597 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/HashTableDictionaryEncoder.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.HashMap; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.hash.ArrowBufHasher; import org.apache.arrow.memory.util.hash.SimpleHasher; @@ -27,43 +25,35 @@ /** * Dictionary encoder based on hash table. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class HashTableDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The hasher used to compute the hash code. - */ + /** The hasher used to compute the hash code. */ private final ArrowBufHasher hasher; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** - * The hash map for distinct dictionary entries. - * The key is the pointer to the dictionary element, whereas the value is the index in the dictionary. + * The hash map for distinct dictionary entries. The key is the pointer to the dictionary element, + * whereas the value is the index in the dictionary. */ private HashMap hashMap = new HashMap<>(); - /** - * The pointer used to probe each element to encode. - */ + /** The pointer used to probe each element to encode. */ private ArrowBufPointer reusablePointer; /** * Constructs a dictionary encoder. - * @param dictionary the dictionary. * + * @param dictionary the dictionary. */ public HashTableDictionaryEncoder(D dictionary) { this(dictionary, false); @@ -71,20 +61,17 @@ public HashTableDictionaryEncoder(D dictionary) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding/decoding. - *

  • - * For encoding, when a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. - *
  • - *
  • - * For decoding, when a null is encountered in the input, - * 1) If the flag is set to true, the decoder should never expect a null in the input. - * 2) If set to false, the decoder simply produces a null in the output. - *
  • + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding/decoding. + *
  • For encoding, when a null is encountered in the input, 1) If the flag is set to true, + * the encoder searches for the value in the dictionary, and outputs the index in the + * dictionary. 2) If the flag is set to false, the encoder simply produces a null in the + * output. + *
  • For decoding, when a null is encountered in the input, 1) If the flag is set to true, + * the decoder should never expect a null in the input. 2) If set to false, the decoder + * simply produces a null in the output. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { this(dictionary, encodeNull, SimpleHasher.INSTANCE); @@ -92,13 +79,13 @@ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull) { /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. * @param hasher the hasher used to calculate the hash code. */ public HashTableDictionaryEncoder(D dictionary, boolean encodeNull, ArrowBufHasher hasher) { @@ -120,12 +107,12 @@ private void buildHashMap() { } /** - * Encodes an input vector by a hash table. - * So the algorithm takes O(n) time, where n is the length of the input vector. + * Encodes an input vector by a hash table. So the algorithm takes O(n) time, where n is the + * length of the input vector. * - * @param input the input vector. + * @param input the input vector. * @param output the output vector. - **/ + */ @Override public void encode(D input, E output) { for (int i = 0; i < input.getValueCount(); i++) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java index 84a3a96af8ef1..9aeff22005751 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/LinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import org.apache.arrow.vector.BaseIntVector; @@ -24,20 +23,17 @@ /** * Dictionary encoder based on linear search. + * * @param encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class LinearDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding. - */ + /** The dictionary for encoding. */ private final D dictionary; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; private RangeEqualsVisitor equalizer; @@ -46,8 +42,10 @@ public class LinearDictionaryEncoder encoded vector type. * @param decoded vector type, which is also the dictionary type. */ public class SearchDictionaryEncoder implements DictionaryEncoder { - /** - * The dictionary for encoding/decoding. - * It must be sorted. - */ + /** The dictionary for encoding/decoding. It must be sorted. */ private final D dictionary; - /** - * The criteria by which the dictionary is sorted. - */ + /** The criteria by which the dictionary is sorted. */ private final VectorValueComparator comparator; - /** - * A flag indicating if null should be encoded. - */ + /** A flag indicating if null should be encoded. */ private final boolean encodeNull; /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. */ @@ -57,28 +51,29 @@ public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator /** * Constructs a dictionary encoder. + * * @param dictionary the dictionary. It must be in sorted order. * @param comparator the criteria for sorting. - * @param encodeNull a flag indicating if null should be encoded. - * It determines the behaviors for processing null values in the input during encoding. - * When a null is encountered in the input, - * 1) If the flag is set to true, the encoder searches for the value in the dictionary, - * and outputs the index in the dictionary. - * 2) If the flag is set to false, the encoder simply produces a null in the output. + * @param encodeNull a flag indicating if null should be encoded. It determines the behaviors for + * processing null values in the input during encoding. When a null is encountered in the + * input, 1) If the flag is set to true, the encoder searches for the value in the dictionary, + * and outputs the index in the dictionary. 2) If the flag is set to false, the encoder simply + * produces a null in the output. */ - public SearchDictionaryEncoder(D dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchDictionaryEncoder( + D dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; } /** - * Encodes an input vector by binary search. - * So the algorithm takes O(n * log(m)) time, where n is the length of the input vector, - * and m is the length of the dictionary. + * Encodes an input vector by binary search. So the algorithm takes O(n * log(m)) time, where n is + * the length of the input vector, and m is the length of the dictionary. + * * @param input the input vector. - * @param output the output vector. Note that it must be in a fresh state. At least, - * all its validity bits should be clear. + * @param output the output vector. Note that it must be in a fresh state. At least, all its + * validity bits should be clear. */ @Override public void encode(D input, E output) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java index f9cd77daa2e76..fca7df067dcff 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/dictionary/SearchTreeBasedDictionaryBuilder.java @@ -14,45 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import java.util.TreeSet; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; /** - * This class builds the dictionary based on a binary search tree. - * Each add operation can be finished in O(log(n)) time, - * where n is the current dictionary size. + * This class builds the dictionary based on a binary search tree. Each add operation can be + * finished in O(log(n)) time, where n is the current dictionary size. * * @param the dictionary vector type. */ -public class SearchTreeBasedDictionaryBuilder implements DictionaryBuilder { +public class SearchTreeBasedDictionaryBuilder + implements DictionaryBuilder { - /** - * The dictionary to be built. - */ + /** The dictionary to be built. */ private final V dictionary; - /** - * The criteria for sorting in the search tree. - */ + /** The criteria for sorting in the search tree. */ protected final VectorValueComparator comparator; - /** - * If null should be encoded. - */ + /** If null should be encoded. */ private final boolean encodeNull; - /** - * The search tree for storing the value index. - */ + /** The search tree for storing the value index. */ private TreeSet searchTree; /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. */ @@ -62,11 +53,13 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c /** * Construct a search tree-based dictionary builder. + * * @param dictionary the dictionary vector. * @param comparator the criteria for value equality. * @param encodeNull if null values should be added to the dictionary. */ - public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator comparator, boolean encodeNull) { + public SearchTreeBasedDictionaryBuilder( + V dictionary, VectorValueComparator comparator, boolean encodeNull) { this.dictionary = dictionary; this.comparator = comparator; this.encodeNull = encodeNull; @@ -76,11 +69,10 @@ public SearchTreeBasedDictionaryBuilder(V dictionary, VectorValueComparator c } /** - * Gets the dictionary built. - * Please note that the dictionary is not in sorted order. - * Instead, its order is determined by the order of element insertion. - * To get the dictionary in sorted order, please use - * {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * Gets the dictionary built. Please note that the dictionary is not in sorted order. Instead, its + * order is determined by the order of element insertion. To get the dictionary in sorted order, + * please use {@link SearchTreeBasedDictionaryBuilder#populateSortedDictionary(ValueVector)}. + * * @return the dictionary. */ @Override @@ -90,6 +82,7 @@ public V getDictionary() { /** * Try to add all values from the target vector to the dictionary. + * * @param targetVector the target vector containing values to probe. * @return the number of values actually added to the dictionary. */ @@ -107,6 +100,7 @@ public int addValues(V targetVector) { /** * Try to add an element from the target vector to the dictionary. + * * @param targetVector the target vector containing new element. * @param targetIndex the index of the new element in the target vector. * @return the index of the new element in the dictionary. @@ -132,8 +126,8 @@ public int addValue(V targetVector, int targetIndex) { } /** - * Gets the sorted dictionary. - * Note that given the binary search tree, the sort can finish in O(n). + * Gets the sorted dictionary. Note that given the binary search tree, the sort can finish in + * O(n). */ public void populateSortedDictionary(V sortedDictionary) { int idx = 0; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java index f5e95cf1033f5..5492676af1a2e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/misc/PartialSumUtils.java @@ -14,26 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import org.apache.arrow.vector.BaseIntVector; -/** - * Partial sum related utilities. - */ +/** Partial sum related utilities. */ public class PartialSumUtils { /** - * Converts an input vector to a partial sum vector. - * This is an inverse operation of {@link PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. - * Suppose we have input vector a and output vector b. - * Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * Converts an input vector to a partial sum vector. This is an inverse operation of {@link + * PartialSumUtils#toDeltaVector(BaseIntVector, BaseIntVector)}. Suppose we have input vector a + * and output vector b. Then we have b(0) = sumBase; b(i + 1) = b(i) + a(i) (i = 0, 1, 2, ...). + * * @param deltaVector the input vector. * @param partialSumVector the output vector. * @param sumBase the base of the partial sums. */ - public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { + public static void toPartialSumVector( + BaseIntVector deltaVector, BaseIntVector partialSumVector, long sumBase) { long sum = sumBase; partialSumVector.setWithPossibleTruncate(0, sumBase); @@ -45,10 +43,10 @@ public static void toPartialSumVector(BaseIntVector deltaVector, BaseIntVector p } /** - * Converts an input vector to the delta vector. - * This is an inverse operation of {@link PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. - * Suppose we have input vector a and output vector b. - * Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * Converts an input vector to the delta vector. This is an inverse operation of {@link + * PartialSumUtils#toPartialSumVector(BaseIntVector, BaseIntVector, long)}. Suppose we have input + * vector a and output vector b. Then we have b(i) = a(i + 1) - a(i) (i = 0, 1, 2, ...). + * * @param partialSumVector the input vector. * @param deltaVector the output vector. */ @@ -61,18 +59,18 @@ public static void toDeltaVector(BaseIntVector partialSumVector, BaseIntVector d } /** - * Given a value and a partial sum vector, finds its position in the partial sum vector. - * In particular, given an integer value a and partial sum vector v, we try to find a - * position i, so that v(i) <= a < v(i + 1). - * The algorithm is based on binary search, so it takes O(log(n)) time, where n is - * the length of the partial sum vector. + * Given a value and a partial sum vector, finds its position in the partial sum vector. In + * particular, given an integer value a and partial sum vector v, we try to find a position i, so + * that v(i) <= a < v(i + 1). The algorithm is based on binary search, so it takes O(log(n)) time, + * where n is the length of the partial sum vector. + * * @param partialSumVector the input partial sum vector. * @param value the value to search. * @return the position in the partial sum vector, if any, or -1, if none is found. */ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, long value) { - if (value < partialSumVector.getValueAsLong(0) || - value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { + if (value < partialSumVector.getValueAsLong(0) + || value >= partialSumVector.getValueAsLong(partialSumVector.getValueCount() - 1)) { return -1; } @@ -114,6 +112,5 @@ public static int findPositionInPartialSumVector(BaseIntVector partialSumVector, throw new IllegalStateException("Should never get here"); } - private PartialSumUtils() { - } + private PartialSumUtils() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java index 43c9a5b010e8c..baa2058ffc51f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/rank/VectorRank.java @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import java.util.stream.IntStream; - import org.apache.arrow.algorithm.sort.IndexSorter; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -28,21 +26,21 @@ /** * Utility for calculating ranks of vector elements. + * * @param the vector type */ public class VectorRank { private VectorValueComparator comparator; - /** - * Vector indices. - */ + /** Vector indices. */ private IntVector indices; private final BufferAllocator allocator; /** * Constructs a vector rank utility. + * * @param allocator the allocator to use. */ public VectorRank(BufferAllocator allocator) { @@ -50,9 +48,10 @@ public VectorRank(BufferAllocator allocator) { } /** - * Given a rank r, gets the index of the element that is the rth smallest in the vector. - * The operation is performed without changing the vector, and takes O(n) time, - * where n is the length of the vector. + * Given a rank r, gets the index of the element that is the rth smallest in the vector. The + * operation is performed without changing the vector, and takes O(n) time, where n is the length + * of the vector. + * * @param vector the vector from which to get the element index. * @param comparator the criteria for vector element comparison. * @param rank the rank to determine. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java index 6226921b22ed6..6a48019edc3eb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/ParallelSearcher.java @@ -14,49 +14,40 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import java.util.concurrent.CompletableFuture; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; - import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; import org.apache.arrow.vector.compare.Range; import org.apache.arrow.vector.compare.RangeEqualsVisitor; /** - * Search for a value in the vector by multiple threads. - * This is often used in scenarios where the vector is large or - * low response time is required. + * Search for a value in the vector by multiple threads. This is often used in scenarios where the + * vector is large or low response time is required. + * * @param the vector type. */ public class ParallelSearcher { - /** - * The target vector to search. - */ + /** The target vector to search. */ private final V vector; - /** - * The thread pool. - */ + /** The thread pool. */ private final ExecutorService threadPool; - /** - * The number of threads to use. - */ + /** The number of threads to use. */ private final int numThreads; - /** - * The position of the key in the target vector, if any. - */ + /** The position of the key in the target vector, if any. */ private volatile int keyPosition = -1; /** * Constructs a parallel searcher. + * * @param vector the vector to search. * @param threadPool the thread pool to use. * @param numThreads the number of threads to use. @@ -77,17 +68,17 @@ private CompletableFuture[] initSearch() { } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link RangeEqualsVisitor}, so there are two possible results for each element-wise - * comparison: equal and un-equal. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * RangeEqualsVisitor}, so there are two possible results for each element-wise comparison: equal + * and un-equal. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ @@ -96,36 +87,38 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); - Range range = new Range(0, 0, 1); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - range.setLeftStart(pos).setRightStart(keyIndex); - if (visitor.rangeEquals(range)) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector, keyVector, null); + Range range = new Range(0, 0, 1); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + range.setLeftStart(pos).setRightStart(keyIndex); + if (visitor.rangeEquals(range)) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); @@ -133,56 +126,58 @@ public int search(V keyVector, int keyIndex) throws ExecutionException, Interrup } /** - * Search for the key in the target vector. The element-wise comparison is based on - * {@link VectorValueComparator}, so there are three possible results for each element-wise - * comparison: less than, equal to and greater than. + * Search for the key in the target vector. The element-wise comparison is based on {@link + * VectorValueComparator}, so there are three possible results for each element-wise comparison: + * less than, equal to and greater than. + * * @param keyVector the vector containing the search key. * @param keyIndex the index of the search key in the key vector. * @param comparator the comparator for comparing the key against vector elements. - * @return the position of a matched value in the target vector, - * or -1 if none is found. Please note that if there are multiple - * matches of the key in the target vector, this method makes no - * guarantees about which instance is returned. - * For an alternative search implementation that always finds the first match of the key, - * see {@link VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. + * @return the position of a matched value in the target vector, or -1 if none is found. Please + * note that if there are multiple matches of the key in the target vector, this method makes + * no guarantees about which instance is returned. For an alternative search implementation + * that always finds the first match of the key, see {@link + * VectorSearcher#linearSearch(ValueVector, VectorValueComparator, ValueVector, int)}. * @throws ExecutionException if an exception occurs in a thread. * @throws InterruptedException if a thread is interrupted. */ - public int search( - V keyVector, int keyIndex, VectorValueComparator comparator) throws ExecutionException, InterruptedException { + public int search(V keyVector, int keyIndex, VectorValueComparator comparator) + throws ExecutionException, InterruptedException { final CompletableFuture[] futures = initSearch(); final int valueCount = vector.getValueCount(); for (int i = 0; i < numThreads; i++) { final int tid = i; - Future unused = threadPool.submit(() -> { - // convert to long to avoid overflow - int start = (int) (((long) valueCount) * tid / numThreads); - int end = (int) ((long) valueCount) * (tid + 1) / numThreads; - - if (start >= end) { - // no data assigned to this task. - futures[tid].complete(false); - return; - } - - VectorValueComparator localComparator = comparator.createNew(); - localComparator.attachVectors(vector, keyVector); - for (int pos = start; pos < end; pos++) { - if (keyPosition != -1) { - // the key has been found by another task - futures[tid].complete(false); - return; - } - if (localComparator.compare(pos, keyIndex) == 0) { - keyPosition = pos; - futures[tid].complete(true); - return; - } - } - - // no match value is found. - futures[tid].complete(false); - }); + Future unused = + threadPool.submit( + () -> { + // convert to long to avoid overflow + int start = (int) (((long) valueCount) * tid / numThreads); + int end = (int) ((long) valueCount) * (tid + 1) / numThreads; + + if (start >= end) { + // no data assigned to this task. + futures[tid].complete(false); + return; + } + + VectorValueComparator localComparator = comparator.createNew(); + localComparator.attachVectors(vector, keyVector); + for (int pos = start; pos < end; pos++) { + if (keyPosition != -1) { + // the key has been found by another task + futures[tid].complete(false); + return; + } + if (localComparator.compare(pos, keyIndex) == 0) { + keyPosition = pos; + futures[tid].complete(true); + return; + } + } + + // no match value is found. + futures[tid].complete(false); + }); } CompletableFuture.allOf(futures).get(); diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java index 249194843f101..c7905dd8956c8 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorRangeSearcher.java @@ -1,108 +1,105 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.arrow.algorithm.search; - -import org.apache.arrow.algorithm.sort.VectorValueComparator; -import org.apache.arrow.vector.ValueVector; - -/** - * Search for the range of a particular element in the target vector. - */ -public class VectorRangeSearcher { - - /** - * Result returned when a search fails. - */ - public static final int SEARCH_FAIL_RESULT = -1; - - /** - * Search for the first occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the first matched element if any, and -1 otherwise. - */ - public static int getFirstMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found - // continue to go left-ward - ret = mid; - high = mid - 1; - } - } - return ret; - } - - /** - * Search for the last occurrence of an element. - * The search is based on the binary search algorithm. So the target vector must be sorted. - * @param targetVector the vector from which to perform the search. - * @param comparator the criterion for the comparison. - * @param keyVector the vector containing the element to search. - * @param keyIndex the index of the search key in the key vector. - * @param the vector type. - * @return the index of the last matched element if any, and -1 otherwise. - */ - public static int getLastMatch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { - comparator.attachVectors(keyVector, targetVector); - - int ret = SEARCH_FAIL_RESULT; - - int low = 0; - int high = targetVector.getValueCount() - 1; - - while (low <= high) { - int mid = low + (high - low) / 2; - int result = comparator.compare(keyIndex, mid); - if (result < 0) { - // the key is smaller - high = mid - 1; - } else if (result > 0) { - // the key is larger - low = mid + 1; - } else { - // an equal element is found, - // continue to go right-ward - ret = mid; - low = mid + 1; - } - } - return ret; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.arrow.algorithm.search; + +import org.apache.arrow.algorithm.sort.VectorValueComparator; +import org.apache.arrow.vector.ValueVector; + +/** Search for the range of a particular element in the target vector. */ +public class VectorRangeSearcher { + + /** Result returned when a search fails. */ + public static final int SEARCH_FAIL_RESULT = -1; + + /** + * Search for the first occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the first matched element if any, and -1 otherwise. + */ + public static int getFirstMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found + // continue to go left-ward + ret = mid; + high = mid - 1; + } + } + return ret; + } + + /** + * Search for the last occurrence of an element. The search is based on the binary search + * algorithm. So the target vector must be sorted. + * + * @param targetVector the vector from which to perform the search. + * @param comparator the criterion for the comparison. + * @param keyVector the vector containing the element to search. + * @param keyIndex the index of the search key in the key vector. + * @param the vector type. + * @return the index of the last matched element if any, and -1 otherwise. + */ + public static int getLastMatch( + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + comparator.attachVectors(keyVector, targetVector); + + int ret = SEARCH_FAIL_RESULT; + + int low = 0; + int high = targetVector.getValueCount() - 1; + + while (low <= high) { + int mid = low + (high - low) / 2; + int result = comparator.compare(keyIndex, mid); + if (result < 0) { + // the key is smaller + high = mid - 1; + } else if (result > 0) { + // the key is larger + low = mid + 1; + } else { + // an equal element is found, + // continue to go right-ward + ret = mid; + low = mid + 1; + } + } + return ret; + } +} diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java index 646bca01bb81d..dd0b4de5d8677 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/search/VectorSearcher.java @@ -14,25 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.vector.ValueVector; -/** - * Search for a particular element in the vector. - */ +/** Search for a particular element in the vector. */ public final class VectorSearcher { - /** - * Result returned when a search fails. - */ + /** Result returned when a search fails. */ public static final int SEARCH_FAIL_RESULT = -1; /** - * Search for a particular element from the key vector in the target vector by binary search. - * The target vector must be sorted. + * Search for a particular element from the key vector in the target vector by binary search. The + * target vector must be sorted. + * * @param targetVector the vector from which to perform the sort. * @param comparator the criterion for the sort. * @param keyVector the vector containing the element to search. @@ -41,7 +37,7 @@ public final class VectorSearcher { * @return the index of a matched element if any, and -1 otherwise. */ public static int binarySearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); // perform binary search @@ -63,7 +59,9 @@ public static int binarySearch( } /** - * Search for a particular element from the key vector in the target vector by traversing the vector in sequence. + * Search for a particular element from the key vector in the target vector by traversing the + * vector in sequence. + * * @param targetVector the vector from which to perform the search. * @param comparator the criterion for element equality. * @param keyVector the vector containing the element to search. @@ -72,7 +70,7 @@ public static int binarySearch( * @return the index of a matched element if any, and -1 otherwise. */ public static int linearSearch( - V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { + V targetVector, VectorValueComparator comparator, V keyVector, int keyIndex) { comparator.attachVectors(keyVector, targetVector); for (int i = 0; i < targetVector.getValueCount(); i++) { if (comparator.compare(keyIndex, i) == 0) { @@ -82,7 +80,5 @@ public static int linearSearch( return SEARCH_FAIL_RESULT; } - private VectorSearcher() { - - } + private VectorSearcher() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java index ec74598e0eebf..77093d87bc489 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/CompositeVectorComparator.java @@ -14,20 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * A composite vector comparator compares a number of vectors - * by a number of inner comparators. - *

    - * It works by first using the first comparator, if a non-zero value - * is returned, it simply returns it. Otherwise, it uses the second comparator, - * and so on, until a non-zero value is produced, or all inner comparators have - * been used. - *

    + * A composite vector comparator compares a number of vectors by a number of inner comparators. + * + *

    It works by first using the first comparator, if a non-zero value is returned, it simply + * returns it. Otherwise, it uses the second comparator, and so on, until a non-zero value is + * produced, or all inner comparators have been used. */ public class CompositeVectorComparator extends VectorValueComparator { @@ -62,7 +58,8 @@ public int compare(int index1, int index2) { @Override public VectorValueComparator createNew() { - VectorValueComparator[] newInnerComparators = new VectorValueComparator[innerComparators.length]; + VectorValueComparator[] newInnerComparators = + new VectorValueComparator[innerComparators.length]; for (int i = 0; i < innerComparators.length; i++) { newInnerComparators[i] = innerComparators[i].createNew(); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java index 588876aa99059..ec650cd9dc88b 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/DefaultVectorComparators.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import java.math.BigDecimal; import java.time.Duration; - import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.ByteFunctionHelpers; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -56,13 +54,12 @@ import org.apache.arrow.vector.complex.RepeatedValueVector; import org.apache.arrow.vector.holders.NullableFixedSizeBinaryHolder; -/** - * Default comparator implementations for different types of vectors. - */ +/** Default comparator implementations for different types of vectors. */ public class DefaultVectorComparators { /** * Create the default comparator for the vector. + * * @param vector the vector. * @param the vector type. * @return the default comparator. @@ -104,7 +101,8 @@ public static VectorValueComparator createDefaultComp } else if (vector instanceof IntervalDayVector) { return (VectorValueComparator) new IntervalDayComparator(); } else if (vector instanceof IntervalMonthDayNanoVector) { - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } else if (vector instanceof TimeMicroVector) { return (VectorValueComparator) new TimeMicroComparator(); } else if (vector instanceof TimeMilliVector) { @@ -122,7 +120,7 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new VariableWidthComparator(); } else if (vector instanceof RepeatedValueVector) { VectorValueComparator innerComparator = - createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); + createDefaultComparator(((RepeatedValueVector) vector).getDataVector()); return new RepeatedValueComparator(innerComparator); } else if (vector instanceof FixedSizeListVector) { VectorValueComparator innerComparator = @@ -132,13 +130,11 @@ public static VectorValueComparator createDefaultComp return (VectorValueComparator) new NullComparator(); } - throw new IllegalArgumentException("No default comparator for " + vector.getClass().getCanonicalName()); + throw new IllegalArgumentException( + "No default comparator for " + vector.getClass().getCanonicalName()); } - /** - * Default comparator for bytes. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bytes. The comparison is based on values, with null comes first. */ public static class ByteComparator extends VectorValueComparator { public ByteComparator() { @@ -159,8 +155,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for short integers. - * The comparison is based on values, with null comes first. + * Default comparator for short integers. The comparison is based on values, with null comes + * first. */ public static class ShortComparator extends VectorValueComparator { @@ -182,8 +178,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for 32-bit integers. - * The comparison is based on int values, with null comes first. + * Default comparator for 32-bit integers. The comparison is based on int values, with null comes + * first. */ public static class IntComparator extends VectorValueComparator { @@ -205,8 +201,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for long integers. - * The comparison is based on values, with null comes first. + * Default comparator for long integers. The comparison is based on values, with null comes first. */ public static class LongComparator extends VectorValueComparator { @@ -229,8 +224,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned bytes. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned bytes. The comparison is based on values, with null comes + * first. */ public static class UInt1Comparator extends VectorValueComparator { @@ -253,8 +248,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned short integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned short integer. The comparison is based on values, with null + * comes first. */ public static class UInt2Comparator extends VectorValueComparator { @@ -280,8 +275,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned integer. The comparison is based on values, with null comes + * first. */ public static class UInt4Comparator extends VectorValueComparator { @@ -303,8 +298,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for unsigned long integer. - * The comparison is based on values, with null comes first. + * Default comparator for unsigned long integer. The comparison is based on values, with null + * comes first. */ public static class UInt8Comparator extends VectorValueComparator { @@ -326,8 +321,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for float type. - * The comparison is based on values, with null comes first. + * Default comparator for float type. The comparison is based on values, with null comes first. */ public static class Float4Comparator extends VectorValueComparator { @@ -363,8 +357,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for double type. - * The comparison is based on values, with null comes first. + * Default comparator for double type. The comparison is based on values, with null comes first. */ public static class Float8Comparator extends VectorValueComparator { @@ -399,10 +392,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for bit type. - * The comparison is based on values, with null comes first. - */ + /** Default comparator for bit type. The comparison is based on values, with null comes first. */ public static class BitComparator extends VectorValueComparator { public BitComparator() { @@ -424,8 +414,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateDay type. - * The comparison is based on values, with null comes first. + * Default comparator for DateDay type. The comparison is based on values, with null comes first. */ public static class DateDayComparator extends VectorValueComparator { @@ -447,8 +436,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for DateMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for DateMilli type. The comparison is based on values, with null comes + * first. */ public static class DateMilliComparator extends VectorValueComparator { @@ -471,8 +460,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal256 type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal256 type. The comparison is based on values, with null comes + * first. */ public static class Decimal256Comparator extends VectorValueComparator { @@ -495,8 +484,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Decimal type. - * The comparison is based on values, with null comes first. + * Default comparator for Decimal type. The comparison is based on values, with null comes first. */ public static class DecimalComparator extends VectorValueComparator { @@ -519,8 +507,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for Duration type. - * The comparison is based on values, with null comes first. + * Default comparator for Duration type. The comparison is based on values, with null comes first. */ public static class DurationComparator extends VectorValueComparator { @@ -543,8 +530,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for IntervalDay type. - * The comparison is based on values, with null comes first. + * Default comparator for IntervalDay type. The comparison is based on values, with null comes + * first. */ public static class IntervalDayComparator extends VectorValueComparator { @@ -567,8 +554,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMicro type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMicro type. The comparison is based on values, with null comes + * first. */ public static class TimeMicroComparator extends VectorValueComparator { @@ -591,8 +578,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeMilli type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeMilli type. The comparison is based on values, with null comes + * first. */ public static class TimeMilliComparator extends VectorValueComparator { @@ -615,8 +602,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeNano type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeNano type. The comparison is based on values, with null comes first. */ public static class TimeNanoComparator extends VectorValueComparator { @@ -639,8 +625,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeSecComparator extends VectorValueComparator { @@ -663,8 +648,7 @@ public VectorValueComparator createNew() { } /** - * Default comparator for TimeSec type. - * The comparison is based on values, with null comes first. + * Default comparator for TimeSec type. The comparison is based on values, with null comes first. */ public static class TimeStampComparator extends VectorValueComparator { @@ -687,10 +671,11 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.FixedSizeBinaryVector}. The comparison is + * in lexicographic order, with null comes first. */ - public static class FixedSizeBinaryComparator extends VectorValueComparator { + public static class FixedSizeBinaryComparator + extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -720,9 +705,7 @@ public VectorValueComparator createNew() { } } - /** - * Default comparator for {@link org.apache.arrow.vector.NullVector}. - */ + /** Default comparator for {@link org.apache.arrow.vector.NullVector}. */ public static class NullComparator extends VectorValueComparator { @Override public int compare(int index1, int index2) { @@ -742,8 +725,8 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. - * The comparison is in lexicographic order, with null comes first. + * Default comparator for {@link org.apache.arrow.vector.VariableWidthVector}. The comparison is + * in lexicographic order, with null comes first. */ public static class VariableWidthComparator extends VectorValueComparator { @@ -772,12 +755,13 @@ public VectorValueComparator createNew() { } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class RepeatedValueComparator - extends VectorValueComparator { + extends VectorValueComparator { private final VectorValueComparator innerComparator; @@ -823,8 +807,9 @@ public void attachVectors(RepeatedValueVector vector1, RepeatedValueVector vecto } /** - * Default comparator for {@link RepeatedValueVector}. - * It works by comparing the underlying vector in a lexicographic order. + * Default comparator for {@link RepeatedValueVector}. It works by comparing the underlying vector + * in a lexicographic order. + * * @param inner vector type. */ public static class FixedSizeListComparator @@ -869,6 +854,5 @@ public void attachVectors(FixedSizeListVector vector1, FixedSizeListVector vecto } } - private DefaultVectorComparators() { - } + private DefaultVectorComparators() {} } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java index aaa7ba117c3ba..ea2b344a1eabb 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthInPlaceVectorSorter.java @@ -14,20 +14,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; /** - * Default in-place sorter for fixed-width vectors. - * It is based on quick-sort, with average time complexity O(n*log(n)). + * Default in-place sorter for fixed-width vectors. It is based on quick-sort, with average time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthInPlaceVectorSorter implements InPlaceVectorSorter { +public class FixedWidthInPlaceVectorSorter + implements InPlaceVectorSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; @@ -35,15 +37,10 @@ public class FixedWidthInPlaceVectorSorter imple VectorValueComparator comparator; - /** - * The vector to sort. - */ + /** The vector to sort. */ V vec; - /** - * The buffer to hold the pivot. - * It always has length 1. - */ + /** The buffer to hold the pivot. It always has length 1. */ V pivotBuffer; @Override @@ -99,9 +96,7 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ void choosePivot(int low, int high) { // we need at least 3 items if (high - low + 1 < STOP_CHOOSING_PIVOT_THRESHOLD) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java index 05a4585792dc2..817e890a5abe1 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/FixedWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -26,18 +25,21 @@ import org.apache.arrow.vector.IntVector; /** - * Default out-of-place sorter for fixed-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default out-of-place sorter for fixed-width vectors. It is an out-of-place sort, with time + * complexity O(n*log(n)). + * * @param vector type. */ -public class FixedWidthOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class FixedWidthOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { if (srcVector instanceof BitVector) { - throw new IllegalArgumentException("BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); + throw new IllegalArgumentException( + "BitVector is not supported with FixedWidthOutOfPlaceVectorSorter."); } comparator.attachVector(srcVector); @@ -49,15 +51,18 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstValueBuffer = dstVector.getDataBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); Preconditions.checkArgument( dstValueBuffer.capacity() >= srcVector.getValueCount() * ((long) srcVector.getTypeWidth()), - "Not enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - srcVector.getValueCount() * srcVector.getTypeWidth(), dstValueBuffer.capacity()); + "Not enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount() * srcVector.getTypeWidth(), + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -73,9 +78,9 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), - dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), - valueWidth); + srcValueBuffer.memoryAddress() + srcIndex * ((long) valueWidth), + dstValueBuffer.memoryAddress() + dstIndex * ((long) valueWidth), + valueWidth); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java index 9ea39f638aebe..18f5e94314f83 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/GeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; @@ -22,23 +21,26 @@ import org.apache.arrow.vector.ValueVector; /** - * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). - * Since it does not make any assumptions about the memory layout of the vector, its performance - * can be sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), - * it should be used in preference. + * An out-of-place sorter for vectors of arbitrary type, with time complexity O(n*log(n)). Since it + * does not make any assumptions about the memory layout of the vector, its performance can be + * sub-optimal. So if another sorter is applicable ({@link FixedWidthInPlaceVectorSorter}), it + * should be used in preference. * * @param vector type. */ -public class GeneralOutOfPlaceVectorSorter implements OutOfPlaceVectorSorter { +public class GeneralOutOfPlaceVectorSorter + implements OutOfPlaceVectorSorter { @Override public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator comparator) { comparator.attachVector(srcVector); // check vector capacity - Preconditions.checkArgument(dstVector.getValueCapacity() >= srcVector.getValueCount(), - "Not enough capacity for the target vector. " + - "Expected capacity %s, actual capacity %s", srcVector.getValueCount(), dstVector.getValueCapacity()); + Preconditions.checkArgument( + dstVector.getValueCapacity() >= srcVector.getValueCount(), + "Not enough capacity for the target vector. " + "Expected capacity %s, actual capacity %s", + srcVector.getValueCount(), + dstVector.getValueCapacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java index 19817fe76b8ec..ba41bb9e4eac7 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InPlaceVectorSorter.java @@ -14,15 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector in-place. - * That is, the sorting is performed by modifying the input vector, - * without creating a new sorted vector. + * Basic interface for sorting a vector in-place. That is, the sorting is performed by modifying the + * input vector, without creating a new sorted vector. * * @param the vector type. */ @@ -30,6 +28,7 @@ public interface InPlaceVectorSorter { /** * Sort a vector in-place. + * * @param vec the vector to sort. * @param comparator the criteria for sort. */ diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java index 3072717f43123..b8ce3289d2889 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/IndexSorter.java @@ -14,39 +14,35 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.stream.IntStream; - import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; /** * Sorter for the indices of a vector. + * * @param vector type. */ public class IndexSorter { /** - * If the number of items is smaller than this threshold, we will use another algorithm to sort the data. + * If the number of items is smaller than this threshold, we will use another algorithm to sort + * the data. */ public static final int CHANGE_ALGORITHM_THRESHOLD = 15; - /** - * Comparator for vector indices. - */ + /** Comparator for vector indices. */ private VectorValueComparator comparator; - /** - * Vector indices to sort. - */ + /** Vector indices to sort. */ private IntVector indices; /** - * Sorts indices, by quick-sort. Suppose the vector is denoted by v. - * After calling this method, the following relations hold: - * v(indices[0]) <= v(indices[1]) <= ... + * Sorts indices, by quick-sort. Suppose the vector is denoted by v. After calling this method, + * the following relations hold: v(indices[0]) <= v(indices[1]) <= ... + * * @param vector the vector whose indices need to be sorted. * @param indices the vector for storing the sorted indices. * @param comparator the comparator to sort indices. @@ -100,11 +96,9 @@ private void quickSort() { } } - /** - * Select the pivot as the median of 3 samples. - */ + /** Select the pivot as the median of 3 samples. */ static int choosePivot( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { // we need at least 3 items if (high - low + 1 < FixedWidthInPlaceVectorSorter.STOP_CHOOSING_PIVOT_THRESHOLD) { return indices.get(low); @@ -149,8 +143,9 @@ static int choosePivot( /** * Partition a range of values in a vector into two parts, with elements in one part smaller than - * elements from the other part. The partition is based on the element indices, so it does - * not modify the underlying vector. + * elements from the other part. The partition is based on the element indices, so it does not + * modify the underlying vector. + * * @param low the lower bound of the range. * @param high the upper bound of the range. * @param indices vector element indices. @@ -159,7 +154,7 @@ static int choosePivot( * @return the index of the split point. */ public static int partition( - int low, int high, IntVector indices, VectorValueComparator comparator) { + int low, int high, IntVector indices, VectorValueComparator comparator) { int pivotIndex = choosePivot(low, high, indices, comparator); while (low < high) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java index dc12a5fefdb65..c058636d66d1e 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/InsertionSorter.java @@ -14,27 +14,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.BaseFixedWidthVector; import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.ValueVector; -/** - * Insertion sorter. - */ +/** Insertion sorter. */ class InsertionSorter { /** * Sorts the range of a vector by insertion sort. * - * @param vector the vector to be sorted. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). - * @param buffer an extra buffer with capacity 1 to hold the current key. + * @param vector the vector to be sorted. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). + * @param buffer an extra buffer with capacity 1 to hold the current key. * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( V vector, int startIdx, int endIdx, VectorValueComparator comparator, V buffer) { @@ -53,11 +50,11 @@ static void insertionSort( /** * Sorts the range of vector indices by insertion sort. * - * @param indices the vector indices. - * @param startIdx the start index of the range (inclusive). - * @param endIdx the end index of the range (inclusive). + * @param indices the vector indices. + * @param startIdx the start index of the range (inclusive). + * @param endIdx the end index of the range (inclusive). * @param comparator the criteria for vector element comparison. - * @param the vector type. + * @param the vector type. */ static void insertionSort( IntVector indices, int startIdx, int endIdx, VectorValueComparator comparator) { diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java index df96121f1f8f7..ccb7bea4e2bd3 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OffHeapIntStack.java @@ -14,15 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.vector.IntVector; -/** - * An off heap implementation of stack with int elements. - */ +/** An off heap implementation of stack with int elements. */ class OffHeapIntStack implements AutoCloseable { private static final int INIT_SIZE = 128; diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java index 41d6dadc49147..b18e9b35d0895 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/OutOfPlaceVectorSorter.java @@ -14,21 +14,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Basic interface for sorting a vector out-of-place. - * That is, the sorting is performed on a newly-created vector, - * and the original vector is not modified. + * Basic interface for sorting a vector out-of-place. That is, the sorting is performed on a + * newly-created vector, and the original vector is not modified. + * * @param the vector type. */ public interface OutOfPlaceVectorSorter { /** * Sort a vector out-of-place. + * * @param inVec the input vector. * @param outVec the output vector, which has the same size as the input vector. * @param comparator the criteria for sort. diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java index 0b0c3bd55b271..3fcfa5f8f215c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/StableVectorComparator.java @@ -14,17 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.util.Preconditions; import org.apache.arrow.vector.ValueVector; /** - * Stable sorter. It compares values like ordinary comparators. - * However, when values are equal, it breaks ties by the value indices. - * Therefore, sort algorithms using this comparator always produce + * Stable sorter. It compares values like ordinary comparators. However, when values are equal, it + * breaks ties by the value indices. Therefore, sort algorithms using this comparator always produce * stable sort results. + * * @param type of the vector. */ public class StableVectorComparator extends VectorValueComparator { @@ -33,6 +32,7 @@ public class StableVectorComparator extends VectorValueCo /** * Constructs a stable comparator from a given comparator. + * * @param innerComparator the comparator to convert to stable comparator.. */ public StableVectorComparator(VectorValueComparator innerComparator) { @@ -47,8 +47,9 @@ public void attachVector(V vector) { @Override public void attachVectors(V vector1, V vector2) { - Preconditions.checkArgument(vector1 == vector2, - "Stable comparator only supports comparing values from the same vector"); + Preconditions.checkArgument( + vector1 == vector2, + "Stable comparator only supports comparing values from the same vector"); super.attachVectors(vector1, vector2); innerComparator.attachVectors(vector1, vector2); } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java index 863b07c348ef2..8f58dc0dcee0f 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.memory.ArrowBuf; @@ -25,12 +24,13 @@ import org.apache.arrow.vector.IntVector; /** - * Default sorter for variable-width vectors. - * It is an out-of-place sort, with time complexity O(n*log(n)). + * Default sorter for variable-width vectors. It is an out-of-place sort, with time complexity + * O(n*log(n)). + * * @param vector type. */ public class VariableWidthOutOfPlaceVectorSorter - implements OutOfPlaceVectorSorter { + implements OutOfPlaceVectorSorter { protected IndexSorter indexSorter = new IndexSorter<>(); @@ -46,20 +46,29 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co ArrowBuf dstOffsetBuffer = dstVector.getOffsetBuffer(); // check buffer size - Preconditions.checkArgument(dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), - "Not enough capacity for the validity buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 7) / 8, dstValidityBuffer.capacity()); Preconditions.checkArgument( - dstOffsetBuffer.capacity() >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), - "Not enough capacity for the offset buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", - (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, dstOffsetBuffer.capacity()); - long dataSize = srcVector.getOffsetBuffer().getInt( - srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + dstValidityBuffer.capacity() * 8 >= srcVector.getValueCount(), + "Not enough capacity for the validity buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 7) / 8, + dstValidityBuffer.capacity()); + Preconditions.checkArgument( + dstOffsetBuffer.capacity() + >= (srcVector.getValueCount() + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), + "Not enough capacity for the offset buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + (srcVector.getValueCount() + 1) * BaseVariableWidthVector.OFFSET_WIDTH, + dstOffsetBuffer.capacity()); + long dataSize = + srcVector + .getOffsetBuffer() + .getInt(srcVector.getValueCount() * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); Preconditions.checkArgument( - dstValueBuffer.capacity() >= dataSize, "No enough capacity for the data buffer of the dst vector. " + - "Expected capacity %s, actual capacity %s", dataSize, dstValueBuffer.capacity()); + dstValueBuffer.capacity() >= dataSize, + "No enough capacity for the data buffer of the dst vector. " + + "Expected capacity %s, actual capacity %s", + dataSize, + dstValueBuffer.capacity()); // sort value indices try (IntVector sortedIndices = new IntVector("", srcVector.getAllocator())) { @@ -77,16 +86,19 @@ public void sortOutOfPlace(V srcVector, V dstVector, VectorValueComparator co BitVectorHelper.unsetBit(dstValidityBuffer, dstIndex); } else { BitVectorHelper.setBit(dstValidityBuffer, dstIndex); - int srcOffset = srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); + int srcOffset = + srcOffsetBuffer.getInt(srcIndex * ((long) BaseVariableWidthVector.OFFSET_WIDTH)); int valueLength = - srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) - srcOffset; + srcOffsetBuffer.getInt((srcIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH)) + - srcOffset; MemoryUtil.UNSAFE.copyMemory( - srcValueBuffer.memoryAddress() + srcOffset, - dstValueBuffer.memoryAddress() + dstOffset, - valueLength); + srcValueBuffer.memoryAddress() + srcOffset, + dstValueBuffer.memoryAddress() + dstOffset, + valueLength); dstOffset += valueLength; } - dstOffsetBuffer.setInt((dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); + dstOffsetBuffer.setInt( + (dstIndex + 1) * ((long) BaseVariableWidthVector.OFFSET_WIDTH), dstOffset); } } } diff --git a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java index d2c772ca8a819..0472f04109b1c 100644 --- a/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java +++ b/java/algorithm/src/main/java/org/apache/arrow/algorithm/sort/VectorValueComparator.java @@ -14,54 +14,44 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import org.apache.arrow.vector.ValueVector; /** - * Compare two values at the given indices in the vectors. - * This is used for vector sorting. + * Compare two values at the given indices in the vectors. This is used for vector sorting. + * * @param type of the vector. */ public abstract class VectorValueComparator { - /** - * The first vector to compare. - */ + /** The first vector to compare. */ protected V vector1; - /** - * The second vector to compare. - */ + /** The second vector to compare. */ protected V vector2; - /** - * Width of the vector value. For variable-length vectors, this value makes no sense. - */ + /** Width of the vector value. For variable-length vectors, this value makes no sense. */ protected int valueWidth; - private boolean checkNullsOnCompare = true; /** - * This value is true by default and re-computed when vectors are attached to the comparator. If both vectors cannot - * contain nulls then this value is {@code false} and calls to {@code compare(i1, i2)} are short-circuited - * to {@code compareNotNull(i1, i2)} thereby speeding up comparisons resulting in faster sorts etc. + * This value is true by default and re-computed when vectors are attached to the comparator. If + * both vectors cannot contain nulls then this value is {@code false} and calls to {@code + * compare(i1, i2)} are short-circuited to {@code compareNotNull(i1, i2)} thereby speeding up + * comparisons resulting in faster sorts etc. */ public boolean checkNullsOnCompare() { return this.checkNullsOnCompare; } - /** - * Constructor for variable-width vectors. - */ - protected VectorValueComparator() { - - } + /** Constructor for variable-width vectors. */ + protected VectorValueComparator() {} /** * Constructor for fixed-width vectors. + * * @param valueWidth the record width (in bytes). */ protected VectorValueComparator(int valueWidth) { @@ -74,6 +64,7 @@ public int getValueWidth() { /** * Attach both vectors to compare to the same input vector. + * * @param vector the vector to attach. */ public void attachVector(V vector) { @@ -82,6 +73,7 @@ public void attachVector(V vector) { /** * Attach vectors to compare. + * * @param vector1 the first vector to compare. * @param vector2 the second vector to compare. */ @@ -99,7 +91,7 @@ private boolean mayHaveNulls(V v) { if (v.getValueCount() == 0) { return true; } - if (! v.getField().isNullable()) { + if (!v.getField().isNullable()) { return false; } return v.getNullCount() > 0; @@ -107,11 +99,11 @@ private boolean mayHaveNulls(V v) { /** * Compare two values, given their indices. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public int compare(int index1, int index2) { if (checkNullsOnCompare) { @@ -133,19 +125,19 @@ public int compare(int index1, int index2) { } /** - * Compare two values, given their indices. - * This is a fast path for comparing non-null values, so the caller - * must make sure that values at both indices are not null. + * Compare two values, given their indices. This is a fast path for comparing non-null values, so + * the caller must make sure that values at both indices are not null. + * * @param index1 index of the first value to compare. * @param index2 index of the second value to compare. - * @return an integer greater than 0, if the first value is greater; - * an integer smaller than 0, if the first value is smaller; or 0, if both - * values are equal. + * @return an integer greater than 0, if the first value is greater; an integer smaller than 0, if + * the first value is smaller; or 0, if both values are equal. */ public abstract int compareNotNull(int index1, int index2); /** * Creates a comparator of the same type. + * * @return the newly created comparator. */ public abstract VectorValueComparator createNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java index ac083b84f1611..537189013a731 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestDeduplicationUtils.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link DeduplicationUtils}. - */ +/** Test cases for {@link DeduplicationUtils}. */ public class TestDeduplicationUtils { private static final int VECTOR_LENGTH = 100; @@ -57,10 +53,11 @@ public void shutdown() { @Test public void testDeduplicateFixedWidth() { try (IntVector origVec = new IntVector("original vec", allocator); - IntVector dedupVec = new IntVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + IntVector dedupVec = new IntVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -73,9 +70,10 @@ public void testDeduplicateFixedWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals( VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -84,7 +82,8 @@ public void testDeduplicateFixedWidth() { assertEquals(i, dedupVec.get(i)); } - DeduplicationUtils.populateRunLengths(distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + DeduplicationUtils.populateRunLengths( + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { @@ -96,12 +95,12 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - ArrowBuf distinctBuf = allocator.buffer( - DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + ArrowBuf distinctBuf = + allocator.buffer( + DataSizeRoundingUtil.divideBy8Ceil(VECTOR_LENGTH * REPETITION_COUNT))) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -114,9 +113,10 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunStartIndicators(origVec, distinctBuf); - assertEquals(VECTOR_LENGTH, - VECTOR_LENGTH * REPETITION_COUNT - - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); + assertEquals( + VECTOR_LENGTH, + VECTOR_LENGTH * REPETITION_COUNT + - BitVectorHelper.getNullCount(distinctBuf, VECTOR_LENGTH * REPETITION_COUNT)); DeduplicationUtils.populateDeduplicatedValues(distinctBuf, origVec, dedupVec); assertEquals(VECTOR_LENGTH, dedupVec.getValueCount()); @@ -126,7 +126,7 @@ public void testDeduplicateVariableWidth() { } DeduplicationUtils.populateRunLengths( - distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); + distinctBuf, lengthVec, VECTOR_LENGTH * REPETITION_COUNT); assertEquals(VECTOR_LENGTH, lengthVec.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java index 788213b162870..820cadccae537 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/deduplicate/TestVectorRunDeduplicator.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.deduplicate; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -30,9 +28,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VectorRunDeduplicator}. - */ +/** Test cases for {@link VectorRunDeduplicator}. */ public class TestVectorRunDeduplicator { private static final int VECTOR_LENGTH = 100; @@ -57,7 +53,7 @@ public void testDeduplicateFixedWidth() { IntVector dedupVec = new IntVector("deduplicated vec", allocator); IntVector lengthVec = new IntVector("length vec", allocator); VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { + new VectorRunDeduplicator<>(origVec, allocator)) { origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); @@ -93,12 +89,11 @@ public void testDeduplicateFixedWidth() { @Test public void testDeduplicateVariableWidth() { try (VarCharVector origVec = new VarCharVector("original vec", allocator); - VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); - IntVector lengthVec = new IntVector("length vec", allocator); - VectorRunDeduplicator deduplicator = - new VectorRunDeduplicator<>(origVec, allocator)) { - origVec.allocateNew( - VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); + VarCharVector dedupVec = new VarCharVector("deduplicated vec", allocator); + IntVector lengthVec = new IntVector("length vec", allocator); + VectorRunDeduplicator deduplicator = + new VectorRunDeduplicator<>(origVec, allocator)) { + origVec.allocateNew(VECTOR_LENGTH * REPETITION_COUNT * 10, VECTOR_LENGTH * REPETITION_COUNT); origVec.setValueCount(VECTOR_LENGTH * REPETITION_COUNT); lengthVec.allocateNew(); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java index 45c47626b720e..bfda86f26883d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -23,7 +22,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableBasedDictionaryBuilder}. - */ +/** Test cases for {@link HashTableBasedDictionaryBuilder}. */ public class TestHashTableBasedDictionaryBuilder { private BufferAllocator allocator; @@ -52,7 +48,7 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -72,27 +68,34 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); assertEquals(7, result); assertEquals(7, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); assertNull(dictionary.get(2)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -112,27 +115,33 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); assertEquals(6, result); assertEquals(6, dictionary.getValueCount()); - assertEquals("hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); - + assertEquals( + "hello", new String(Objects.requireNonNull(dictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(dictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(dictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(dictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(dictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(dictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -151,7 +160,7 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, true); + new HashTableBasedDictionaryBuilder<>(dictionary, true); int result = dictionaryBuilder.addValues(vec); @@ -169,7 +178,7 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -188,7 +197,7 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.setNull(9); HashTableBasedDictionaryBuilder dictionaryBuilder = - new HashTableBasedDictionaryBuilder<>(dictionary, false); + new HashTableBasedDictionaryBuilder<>(dictionary, false); int result = dictionaryBuilder.addValues(vec); @@ -199,7 +208,6 @@ public void testBuildFixedWidthDictionaryWithoutNull() { assertEquals(8, dictionary.get(1)); assertEquals(32, dictionary.get(2)); assertEquals(16, dictionary.get(3)); - } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java index 60efbf58bebda..b9646284a015b 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestHashTableDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -38,9 +36,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link HashTableDictionaryEncoder}. - */ +/** Test cases for {@link HashTableDictionaryEncoder}. */ public class TestHashTableDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -53,7 +49,7 @@ public class TestHashTableDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -69,8 +65,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -89,7 +85,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, false); + new HashTableDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -98,17 +94,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -119,8 +119,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -144,7 +144,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -156,20 +156,24 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -180,8 +184,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -199,13 +203,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionary, true); + new HashTableDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -213,8 +219,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -235,7 +241,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -262,8 +268,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -281,7 +287,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -305,8 +311,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -327,7 +333,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); HashTableDictionaryEncoder encoder = - new HashTableDictionaryEncoder<>(dictionaryVector); + new HashTableDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java index a76aedffa308d..a4641704198cb 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestLinearDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -39,9 +37,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link LinearDictionaryEncoder}. - */ +/** Test cases for {@link LinearDictionaryEncoder}. */ public class TestLinearDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -54,7 +50,7 @@ public class TestLinearDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -70,8 +66,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -90,7 +86,7 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, false); + new LinearDictionaryEncoder<>(dictionary, false); // perform encoding encodedVector.allocateNew(); @@ -99,17 +95,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -120,8 +120,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -145,7 +145,7 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // perform encoding encodedVector.allocateNew(); @@ -157,13 +157,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -171,7 +174,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -182,8 +186,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -201,13 +205,15 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionary, true); + new LinearDictionaryEncoder<>(dictionary, true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -215,8 +221,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -237,7 +243,7 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); // verify indices @@ -263,8 +269,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -282,7 +288,7 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -306,8 +312,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -328,7 +334,7 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); LinearDictionaryEncoder encoder = - new LinearDictionaryEncoder<>(dictionaryVector); + new LinearDictionaryEncoder<>(dictionaryVector); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java index e01c2e7905b46..e783e1f76818c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchDictionaryEncoder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static junit.framework.TestCase.assertTrue; @@ -25,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Random; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; @@ -40,9 +38,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchDictionaryEncoder}. - */ +/** Test cases for {@link SearchDictionaryEncoder}. */ public class TestSearchDictionaryEncoder { private final int VECTOR_LENGTH = 50; @@ -55,7 +51,7 @@ public class TestSearchDictionaryEncoder { byte[] one = "111".getBytes(StandardCharsets.UTF_8); byte[] two = "222".getBytes(StandardCharsets.UTF_8); - byte[][] data = new byte[][]{zero, one, two}; + byte[][] data = new byte[][] {zero, one, two}; @Before public void prepare() { @@ -71,8 +67,8 @@ public void shutdown() { public void testEncodeAndDecode() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -91,8 +87,8 @@ public void testEncodeAndDecode() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), false); // perform encoding encodedVector.allocateNew(); @@ -101,17 +97,21 @@ public void testEncodeAndDecode() { // verify encoding results assertEquals(rawVector.getValueCount(), encodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); for (int i = 0; i < VECTOR_LENGTH; i++) { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -122,8 +122,8 @@ public void testEncodeAndDecode() { public void testEncodeAndDecodeWithNull() { Random random = new Random(); try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary dictionary.allocateNew(); @@ -147,8 +147,8 @@ public void testEncodeAndDecodeWithNull() { rawVector.setValueCount(VECTOR_LENGTH); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // perform encoding encodedVector.allocateNew(); @@ -160,13 +160,16 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertEquals(0, encodedVector.get(i)); } else { - assertArrayEquals(rawVector.get(i), String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + rawVector.get(i), + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8)); } } // perform decoding Dictionary dict = new Dictionary(dictionary, new DictionaryEncoding(1L, false, null)); - try (VarCharVector decodedVector = (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { + try (VarCharVector decodedVector = + (VarCharVector) DictionaryEncoder.decode(encodedVector, dict)) { // verify decoding results assertEquals(encodedVector.getValueCount(), decodedVector.getValueCount()); @@ -174,7 +177,8 @@ public void testEncodeAndDecodeWithNull() { if (i % 10 == 0) { assertTrue(decodedVector.isNull(i)); } else { - assertArrayEquals(String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), + assertArrayEquals( + String.valueOf(encodedVector.get(i)).getBytes(StandardCharsets.UTF_8), decodedVector.get(i)); } } @@ -185,8 +189,8 @@ public void testEncodeAndDecodeWithNull() { @Test public void testEncodeNullWithoutNullInDictionary() { try (VarCharVector rawVector = new VarCharVector("original vector", allocator); - IntVector encodedVector = new IntVector("encoded vector", allocator); - VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { + IntVector encodedVector = new IntVector("encoded vector", allocator); + VarCharVector dictionary = new VarCharVector("dictionary", allocator)) { // set up dictionary, with no null in it. dictionary.allocateNew(); @@ -204,14 +208,16 @@ public void testEncodeNullWithoutNullInDictionary() { encodedVector.allocateNew(); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); + new SearchDictionaryEncoder<>( + dictionary, DefaultVectorComparators.createDefaultComparator(rawVector), true); // the encoder should encode null, but no null in the dictionary, // so an exception should be thrown. - assertThrows(IllegalArgumentException.class, () -> { - encoder.encode(rawVector, encodedVector); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + encoder.encode(rawVector, encodedVector); + }); } } @@ -219,8 +225,8 @@ public void testEncodeNullWithoutNullInDictionary() { public void testEncodeStrings() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(512, 5); encoded.allocateNew(); @@ -241,8 +247,8 @@ public void testEncodeStrings() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); // verify indices @@ -268,8 +274,8 @@ public void testEncodeStrings() { public void testEncodeLargeVector() { // Create a new value vector try (final VarCharVector vector = new VarCharVector("foo", allocator); - final IntVector encoded = new IntVector("encoded", allocator); - final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { + final IntVector encoded = new IntVector("encoded", allocator); + final VarCharVector dictionaryVector = new VarCharVector("dict", allocator)) { vector.allocateNew(); encoded.allocateNew(); @@ -287,8 +293,8 @@ public void testEncodeLargeVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(count, encoded.getValueCount()); @@ -312,8 +318,8 @@ public void testEncodeLargeVector() { public void testEncodeBinaryVector() { // Create a new value vector try (final VarBinaryVector vector = new VarBinaryVector("foo", allocator); - final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); - final IntVector encoded = new IntVector("encoded", allocator)) { + final VarBinaryVector dictionaryVector = new VarBinaryVector("dict", allocator); + final IntVector encoded = new IntVector("encoded", allocator)) { vector.allocateNew(512, 5); vector.allocateNew(); encoded.allocateNew(); @@ -334,8 +340,8 @@ public void testEncodeBinaryVector() { dictionaryVector.setValueCount(3); SearchDictionaryEncoder encoder = - new SearchDictionaryEncoder<>( - dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); + new SearchDictionaryEncoder<>( + dictionaryVector, DefaultVectorComparators.createDefaultComparator(vector)); encoder.encode(vector, encoded); assertEquals(5, encoded.getValueCount()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java index 340b7e67e861f..6c8a57c1a4648 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/dictionary/TestSearchTreeBasedDictionaryBuilder.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.dictionary; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link SearchTreeBasedDictionaryBuilder}. - */ +/** Test cases for {@link SearchTreeBasedDictionaryBuilder}. */ public class TestSearchTreeBasedDictionaryBuilder { private BufferAllocator allocator; @@ -53,8 +49,8 @@ public void shutdown() { @Test public void testBuildVariableWidthDictionaryWithNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -74,9 +70,10 @@ public void testBuildVariableWidthDictionaryWithNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -86,20 +83,32 @@ public void testBuildVariableWidthDictionaryWithNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); assertTrue(sortedDictionary.isNull(0)); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(6)), StandardCharsets.UTF_8)); } } @Test public void testBuildVariableWidthDictionaryWithoutNull() { try (VarCharVector vec = new VarCharVector("", allocator); - VarCharVector dictionary = new VarCharVector("", allocator); - VarCharVector sortedDictionary = new VarCharVector("", allocator)) { + VarCharVector dictionary = new VarCharVector("", allocator); + VarCharVector sortedDictionary = new VarCharVector("", allocator)) { vec.allocateNew(100, 10); vec.setValueCount(10); @@ -119,9 +128,10 @@ public void testBuildVariableWidthDictionaryWithoutNull() { vec.set(8, "good".getBytes(StandardCharsets.UTF_8)); vec.set(9, "abc".getBytes(StandardCharsets.UTF_8)); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); @@ -130,20 +140,32 @@ public void testBuildVariableWidthDictionaryWithoutNull() { dictionaryBuilder.populateSortedDictionary(sortedDictionary); - assertEquals("12", new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "12", + new String(Objects.requireNonNull(sortedDictionary.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "abc", + new String(Objects.requireNonNull(sortedDictionary.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedDictionary.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "good", + new String(Objects.requireNonNull(sortedDictionary.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "hello", + new String(Objects.requireNonNull(sortedDictionary.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "world", + new String(Objects.requireNonNull(sortedDictionary.get(5)), StandardCharsets.UTF_8)); } } @Test public void testBuildFixedWidthDictionaryWithNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -162,9 +184,10 @@ public void testBuildFixedWidthDictionaryWithNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, true); int result = dictionaryBuilder.addValues(vec); @@ -184,8 +207,8 @@ public void testBuildFixedWidthDictionaryWithNull() { @Test public void testBuildFixedWidthDictionaryWithoutNull() { try (IntVector vec = new IntVector("", allocator); - IntVector dictionary = new IntVector("", allocator); - IntVector sortedDictionary = new IntVector("", allocator)) { + IntVector dictionary = new IntVector("", allocator); + IntVector sortedDictionary = new IntVector("", allocator)) { vec.allocateNew(10); vec.setValueCount(10); @@ -204,9 +227,10 @@ public void testBuildFixedWidthDictionaryWithoutNull() { vec.set(8, 4); vec.setNull(9); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SearchTreeBasedDictionaryBuilder dictionaryBuilder = - new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); + new SearchTreeBasedDictionaryBuilder<>(dictionary, comparator, false); int result = dictionaryBuilder.addValues(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java index 630dd80b44084..e3ab981670e9e 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/misc/TestPartialSumUtils.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.misc; import static org.junit.Assert.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link PartialSumUtils}. - */ +/** Test cases for {@link PartialSumUtils}. */ public class TestPartialSumUtils { private static final int PARTIAL_SUM_VECTOR_LENGTH = 101; @@ -50,7 +47,7 @@ public void shutdown() { @Test public void testToPartialSumVector() { try (IntVector delta = new IntVector("delta", allocator); - IntVector partialSum = new IntVector("partial sum", allocator)) { + IntVector partialSum = new IntVector("partial sum", allocator)) { delta.allocateNew(DELTA_VECTOR_LENGTH); delta.setValueCount(DELTA_VECTOR_LENGTH); @@ -75,7 +72,7 @@ public void testToPartialSumVector() { @Test public void testToDeltaVector() { try (IntVector partialSum = new IntVector("partial sum", allocator); - IntVector delta = new IntVector("delta", allocator)) { + IntVector delta = new IntVector("delta", allocator)) { partialSum.allocateNew(PARTIAL_SUM_VECTOR_LENGTH); partialSum.setValueCount(PARTIAL_SUM_VECTOR_LENGTH); @@ -111,7 +108,8 @@ public void testFindPositionInPartialSumVector() { // search and verify results for (int i = 0; i < PARTIAL_SUM_VECTOR_LENGTH - 1; i++) { - assertEquals(i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); + assertEquals( + i, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase + 3 * i + 1)); } } } @@ -131,8 +129,10 @@ public void testFindPositionInPartialSumVectorNegative() { // search and verify results assertEquals(0, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase)); assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, sumBase - 1)); - assertEquals(-1, PartialSumUtils.findPositionInPartialSumVector(partialSum, - sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); + assertEquals( + -1, + PartialSumUtils.findPositionInPartialSumVector( + partialSum, sumBase + 3 * (PARTIAL_SUM_VECTOR_LENGTH - 1))); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java index 0e6627eb4822a..4b7c6a9756780 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/rank/TestVectorRank.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.rank; import static org.junit.Assert.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.rank.VectorRank}. */ public class TestVectorRank { private BufferAllocator allocator; @@ -70,7 +66,7 @@ public void testFixedWidthRank() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); assertEquals(6, rank.indexAtRank(vector, comparator, 2)); @@ -103,7 +99,7 @@ public void testVariableWidthRank() { vector.set(9, String.valueOf(6).getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); assertEquals(7, rank.indexAtRank(vector, comparator, 0)); assertEquals(0, rank.indexAtRank(vector, comparator, 1)); @@ -137,11 +133,13 @@ public void testRankNegative() { vector.set(9, 6); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vector); + DefaultVectorComparators.createDefaultComparator(vector); - assertThrows(IllegalArgumentException.class, () -> { - rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); - }); + assertThrows( + IllegalArgumentException.class, + () -> { + rank.indexAtRank(vector, comparator, VECTOR_LENGTH + 1); + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java index 9ccecfa84a73a..7ff86a743effd 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestParallelSearcher.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -26,7 +25,6 @@ import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -39,9 +37,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link ParallelSearcher}. - */ +/** Test cases for {@link ParallelSearcher}. */ @RunWith(Parameterized.class) public class TestParallelSearcher { @@ -97,8 +93,10 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.set(i, i); @@ -107,9 +105,13 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { @@ -122,13 +124,15 @@ public void testParallelIntSearch() throws ExecutionException, InterruptedExcept @Test public void testParallelStringSearch() throws ExecutionException, InterruptedException { try (VarCharVector targetVector = new VarCharVector("targetVector", allocator); - VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { + VarCharVector keyVector = new VarCharVector("keyVector", allocator)) { targetVector.allocateNew(VECTOR_LENGTH); keyVector.allocateNew(VECTOR_LENGTH); // if we are comparing elements using equality semantics, we do not need a comparator here. - VectorValueComparator comparator = comparatorType == ComparatorType.EqualityComparator ? null - : DefaultVectorComparators.createDefaultComparator(targetVector); + VectorValueComparator comparator = + comparatorType == ComparatorType.EqualityComparator + ? null + : DefaultVectorComparators.createDefaultComparator(targetVector); for (int i = 0; i < VECTOR_LENGTH; i++) { targetVector.setSafe(i, String.valueOf(i).getBytes(StandardCharsets.UTF_8)); @@ -137,9 +141,13 @@ public void testParallelStringSearch() throws ExecutionException, InterruptedExc targetVector.setValueCount(VECTOR_LENGTH); keyVector.setValueCount(VECTOR_LENGTH); - ParallelSearcher searcher = new ParallelSearcher<>(targetVector, threadPool, threadCount); + ParallelSearcher searcher = + new ParallelSearcher<>(targetVector, threadPool, threadCount); for (int i = 0; i < VECTOR_LENGTH; i++) { - int pos = comparator == null ? searcher.search(keyVector, i) : searcher.search(keyVector, i, comparator); + int pos = + comparator == null + ? searcher.search(keyVector, i) + : searcher.search(keyVector, i, comparator); if (i * 2 < VECTOR_LENGTH) { assertEquals(i * 2, pos); } else { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java index 18f4fa0355f4f..39f2f609f7df4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorRangeSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.Collection; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -33,9 +31,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for {@link VectorRangeSearcher}. - */ +/** Test cases for {@link VectorRangeSearcher}. */ @RunWith(Parameterized.class) public class TestVectorRangeSearcher { @@ -78,9 +74,11 @@ public void testGetLowerBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { - int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); + int result = + VectorRangeSearcher.getFirstMatch(intVector, comparator, intVector, i * repeat); assertEquals(i * ((long) repeat), result); } } @@ -112,7 +110,8 @@ public void testGetLowerBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getFirstMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -141,7 +140,8 @@ public void testGetUpperBounds() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, intVector, i * repeat); assertEquals((i + 1) * repeat - 1, result); @@ -153,7 +153,7 @@ public void testGetUpperBounds() { public void testGetUpperBoundsNegative() { final int maxValue = 100; try (IntVector intVector = new IntVector("int vec", allocator); - IntVector negVector = new IntVector("neg vec", allocator)) { + IntVector negVector = new IntVector("neg vec", allocator)) { // allocate vector intVector.allocateNew(maxValue * repeat); intVector.setValueCount(maxValue * repeat); @@ -175,7 +175,8 @@ public void testGetUpperBoundsNegative() { } // do search - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(intVector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(intVector); for (int i = 0; i < maxValue; i++) { int result = VectorRangeSearcher.getLastMatch(intVector, comparator, negVector, i); assertEquals(-1, result); @@ -185,11 +186,6 @@ public void testGetUpperBoundsNegative() { @Parameterized.Parameters(name = "repeat = {0}") public static Collection getRepeat() { - return Arrays.asList( - new Object[]{1}, - new Object[]{2}, - new Object[]{5}, - new Object[]{10} - ); + return Arrays.asList(new Object[] {1}, new Object[] {2}, new Object[] {5}, new Object[] {10}); } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java index 32fa10bbd98d0..629d900b479b6 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/search/TestVectorSearcher.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.search; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; import static org.junit.Assert.assertEquals; import java.nio.charset.StandardCharsets; - import org.apache.arrow.algorithm.sort.DefaultVectorComparators; import org.apache.arrow.algorithm.sort.VectorValueComparator; import org.apache.arrow.memory.BufferAllocator; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. - */ +/** Test cases for {@link org.apache.arrow.algorithm.search.VectorSearcher}. */ public class TestVectorSearcher { private final int VECTOR_LENGTH = 100; @@ -59,7 +55,7 @@ public void shutdown() { @Test public void testBinarySearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -77,7 +73,7 @@ public void testBinarySearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -91,7 +87,7 @@ public void testBinarySearchInt() { @Test public void testLinearSearchInt() { try (IntVector rawVector = new IntVector("", allocator); - IntVector negVector = new IntVector("", allocator)) { + IntVector negVector = new IntVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(1); @@ -109,7 +105,7 @@ public void testLinearSearchInt() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -123,7 +119,7 @@ public void testLinearSearchInt() { @Test public void testBinarySearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -148,7 +144,7 @@ public void testBinarySearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -162,7 +158,7 @@ public void testBinarySearchVarChar() { @Test public void testLinearSearchVarChar() { try (VarCharVector rawVector = new VarCharVector("", allocator); - VarCharVector negVector = new VarCharVector("", allocator)) { + VarCharVector negVector = new VarCharVector("", allocator)) { rawVector.allocateNew(VECTOR_LENGTH * 16, VECTOR_LENGTH); rawVector.setValueCount(VECTOR_LENGTH); negVector.allocateNew(VECTOR_LENGTH, 1); @@ -187,7 +183,7 @@ public void testLinearSearchVarChar() { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < VECTOR_LENGTH; i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -260,11 +256,11 @@ private ListVector createNegativeListVector() { @Test public void testBinarySearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.binarySearch(rawVector, comparator, rawVector, i); assertEquals(i, result); @@ -281,11 +277,11 @@ public void testBinarySearchList() { @Test public void testLinearSearchList() { try (ListVector rawVector = createListVector(); - ListVector negVector = createNegativeListVector()) { + ListVector negVector = createNegativeListVector()) { // do search VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(rawVector); + DefaultVectorComparators.createDefaultComparator(rawVector); for (int i = 0; i < rawVector.getValueCount(); i++) { int result = VectorSearcher.linearSearch(rawVector, comparator, rawVector, i); assertEquals(i, result); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java index 9624432924b5a..21f6c0217c376 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestCompositeVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -33,9 +31,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link CompositeVectorComparator}. - */ +/** Test cases for {@link CompositeVectorComparator}. */ public class TestCompositeVectorComparator { private BufferAllocator allocator; @@ -60,7 +56,7 @@ public void testCompareVectorSchemaRoot() { VarCharVector strVec2 = new VarCharVector("str2", allocator); try (VectorSchemaRoot batch1 = new VectorSchemaRoot(Arrays.asList(intVec1, strVec1)); - VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { + VectorSchemaRoot batch2 = new VectorSchemaRoot(Arrays.asList(intVec2, strVec2))) { intVec1.allocateNew(vectorLength); strVec1.allocateNew(vectorLength * 10, vectorLength); @@ -75,15 +71,15 @@ public void testCompareVectorSchemaRoot() { } VectorValueComparator innerComparator1 = - DefaultVectorComparators.createDefaultComparator(intVec1); + DefaultVectorComparators.createDefaultComparator(intVec1); innerComparator1.attachVectors(intVec1, intVec2); VectorValueComparator innerComparator2 = - DefaultVectorComparators.createDefaultComparator(strVec1); + DefaultVectorComparators.createDefaultComparator(strVec1); innerComparator2.attachVectors(strVec1, strVec2); - VectorValueComparator comparator = new CompositeVectorComparator( - new VectorValueComparator[]{innerComparator1, innerComparator2} - ); + VectorValueComparator comparator = + new CompositeVectorComparator( + new VectorValueComparator[] {innerComparator1, innerComparator2}); // verify results diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java index c40854fb17410..f1b3d6fb5aa1d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestDefaultVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.apache.arrow.vector.complex.BaseRepeatedValueVector.OFFSET_WIDTH; @@ -67,9 +66,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link DefaultVectorComparators}. - */ +/** Test cases for {@link DefaultVectorComparators}. */ public class TestDefaultVectorComparator { private BufferAllocator allocator; @@ -111,9 +108,9 @@ private ListVector createListVector(int count) { @Test public void testCompareLists() { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // prefix is smaller @@ -121,11 +118,11 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(11); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // breaking tie by the last element @@ -133,10 +130,10 @@ public void testCompareLists() { } try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(10)) { + ListVector listVector2 = createListVector(10)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); // list vector elements equal @@ -149,9 +146,9 @@ public void testCopiedComparatorForLists() { for (int i = 1; i < 10; i++) { for (int j = 1; j < 10; j++) { try (ListVector listVector1 = createListVector(10); - ListVector listVector2 = createListVector(11)) { + ListVector listVector2 = createListVector(11)) { VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(listVector1); + DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); VectorValueComparator copyComparator = comparator.createNew(); @@ -185,7 +182,7 @@ private FixedSizeListVector createFixedSizeListVector(int count) { @Test public void testCompareFixedSizeLists() { try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); comparator.attachVectors(listVector1, listVector2); @@ -195,7 +192,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(11); - FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(11)) { ((IntVector) listVector2.getDataVector()).set(10, 110); VectorValueComparator comparator = @@ -207,7 +204,7 @@ public void testCompareFixedSizeLists() { } try (FixedSizeListVector listVector1 = createFixedSizeListVector(10); - FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { + FixedSizeListVector listVector2 = createFixedSizeListVector(10)) { VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(listVector1); @@ -236,7 +233,7 @@ public void testCompareUInt1() { vec.set(9, Byte.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -259,14 +256,21 @@ public void testCompareUInt2() { vec.allocateNew(10); ValueVectorDataPopulator.setVector( - vec, null, (char) (Character.MAX_VALUE - 1), Character.MAX_VALUE, (char) 0, (char) 1, - (char) 2, (char) (Character.MAX_VALUE - 1), null, + vec, + null, + (char) (Character.MAX_VALUE - 1), + Character.MAX_VALUE, + (char) 0, + (char) 1, + (char) 2, + (char) (Character.MAX_VALUE - 1), + null, '\u7FFF', // value for the max 16-byte signed integer '\u8000' // value for the min 16-byte signed integer - ); + ); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -301,7 +305,7 @@ public void testCompareUInt4() { vec.set(9, Integer.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -336,7 +340,7 @@ public void testCompareUInt8() { vec.set(9, Long.MIN_VALUE); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.compare(0, 1) < 0); @@ -358,7 +362,16 @@ public void testCompareFloat4() { try (Float4Vector vec = new Float4Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1f, 0.0f, 1.0f, null, 1.0f, 2.0f, Float.NaN, Float.NaN, Float.POSITIVE_INFINITY, + vec, + -1.1f, + 0.0f, + 1.0f, + null, + 1.0f, + 2.0f, + Float.NaN, + Float.NaN, + Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -393,7 +406,16 @@ public void testCompareFloat8() { try (Float8Vector vec = new Float8Vector("", allocator)) { vec.allocateNew(9); ValueVectorDataPopulator.setVector( - vec, -1.1, 0.0, 1.0, null, 1.0, 2.0, Double.NaN, Double.NaN, Double.POSITIVE_INFINITY, + vec, + -1.1, + 0.0, + 1.0, + null, + 1.0, + 2.0, + Double.NaN, + Double.NaN, + Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); VectorValueComparator comparator = @@ -488,8 +510,15 @@ public void testCompareShort() { try (SmallIntVector vec = new SmallIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (short) -1, (short) 0, (short) 1, null, (short) 1, (short) 5, - (short) (Short.MIN_VALUE + 1), Short.MAX_VALUE); + vec, + (short) -1, + (short) 0, + (short) 1, + null, + (short) 1, + (short) 5, + (short) (Short.MIN_VALUE + 1), + Short.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -519,8 +548,15 @@ public void testCompareByte() { try (TinyIntVector vec = new TinyIntVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( - vec, (byte) -1, (byte) 0, (byte) 1, null, (byte) 1, (byte) 5, - (byte) (Byte.MIN_VALUE + 1), Byte.MAX_VALUE); + vec, + (byte) -1, + (byte) 0, + (byte) 1, + null, + (byte) 1, + (byte) 5, + (byte) (Byte.MIN_VALUE + 1), + Byte.MAX_VALUE); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -549,8 +585,7 @@ public void testCompareByte() { public void testCompareBit() { try (BitVector vec = new BitVector("", allocator)) { vec.allocateNew(6); - ValueVectorDataPopulator.setVector( - vec, 1, 2, 0, 0, -1, null); + ValueVectorDataPopulator.setVector(vec, 1, 2, 0, 0, -1, null); VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); @@ -691,7 +726,8 @@ public void testCompareDecimal256() { @Test public void testCompareDuration() { try (DurationVector vec = - new DurationVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new DurationVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -722,7 +758,8 @@ public void testCompareDuration() { @Test public void testCompareIntervalDay() { try (IntervalDayVector vec = - new IntervalDayVector("", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { + new IntervalDayVector( + "", FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), allocator)) { vec.allocateNew(8); vec.set(0, -1, 0); vec.set(1, 0, 0); @@ -755,8 +792,7 @@ public void testCompareIntervalDay() { @Test public void testCompareTimeMicro() { - try (TimeMicroVector vec = - new TimeMicroVector("", allocator)) { + try (TimeMicroVector vec = new TimeMicroVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -816,8 +852,7 @@ public void testCompareTimeMilli() { @Test public void testCompareTimeNano() { - try (TimeNanoVector vec = - new TimeNanoVector("", allocator)) { + try (TimeNanoVector vec = new TimeNanoVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -877,8 +912,7 @@ public void testCompareTimeSec() { @Test public void testCompareTimeStamp() { - try (TimeStampMilliVector vec = - new TimeStampMilliVector("", allocator)) { + try (TimeStampMilliVector vec = new TimeStampMilliVector("", allocator)) { vec.allocateNew(8); ValueVectorDataPopulator.setVector( vec, -1L, 0L, 1L, null, 1L, 5L, Long.MIN_VALUE + 1L, Long.MAX_VALUE); @@ -909,7 +943,7 @@ public void testCompareTimeStamp() { @Test public void testCompareFixedSizeBinary() { try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 2); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1}); @@ -923,7 +957,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 0}); @@ -937,7 +971,7 @@ public void testCompareFixedSizeBinary() { } try (FixedSizeBinaryVector vector1 = new FixedSizeBinaryVector("test1", allocator, 3); - FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { + FixedSizeBinaryVector vector2 = new FixedSizeBinaryVector("test1", allocator, 3)) { vector1.allocateNew(); vector2.allocateNew(); vector1.set(0, new byte[] {1, 1, 1}); @@ -953,8 +987,8 @@ public void testCompareFixedSizeBinary() { @Test public void testCompareNull() { - try (NullVector vec = new NullVector("test", - FieldType.notNullable(new ArrowType.Int(32, false)))) { + try (NullVector vec = + new NullVector("test", FieldType.notNullable(new ArrowType.Int(32, false)))) { vec.setValueCount(2); VectorValueComparator comparator = @@ -967,12 +1001,14 @@ public void testCompareNull() { @Test public void testCheckNullsOnCompareIsFalseForNonNullableVector() { - try (IntVector vec = new IntVector("not nullable", - FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { + try (IntVector vec = + new IntVector( + "not nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -981,16 +1017,17 @@ public void testCheckNullsOnCompareIsFalseForNonNullableVector() { @Test public void testCheckNullsOnCompareIsTrueForNullableVector() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("not-nullable", FieldType.notNullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "not-nullable", FieldType.notNullable(new ArrowType.Int(32, false)), allocator)) { ValueVectorDataPopulator.setVector(vec, 1, null, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertTrue(comparator.checkNullsOnCompare()); @@ -1001,17 +1038,18 @@ public void testCheckNullsOnCompareIsTrueForNullableVector() { @Test public void testCheckNullsOnCompareIsFalseWithNoNulls() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { // no null values ValueVectorDataPopulator.setVector(vec, 1, 2, 3, 4); ValueVectorDataPopulator.setVector(vec2, 1, 2, 3, 4); - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec); assertFalse(comparator.checkNullsOnCompare()); @@ -1022,13 +1060,14 @@ public void testCheckNullsOnCompareIsFalseWithNoNulls() { @Test public void testCheckNullsOnCompareIsTrueWithEmptyVectors() { - try (IntVector vec = new IntVector("nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator); - IntVector vec2 = new IntVector("also-nullable", FieldType.nullable( - new ArrowType.Int(32, false)), allocator) - ) { + try (IntVector vec = + new IntVector("nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator); + IntVector vec2 = + new IntVector( + "also-nullable", FieldType.nullable(new ArrowType.Int(32, false)), allocator)) { - final VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + final VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); comparator.attachVector(vec2); assertTrue(comparator.checkNullsOnCompare()); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java index 91ef52017df4d..ed5aadfcda04c 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthInPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.IntVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthInPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthInPlaceVectorSorter}. */ public class TestFixedWidthInPlaceVectorSorter { private BufferAllocator allocator; @@ -69,7 +65,8 @@ public void testSortInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -90,8 +87,8 @@ public void testSortInt() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -107,7 +104,8 @@ public void testSortLargeIncreasingInt() { // sort the vector FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); sorter.sortInPlace(vec, comparator); @@ -133,7 +131,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -164,16 +163,15 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("", allocator)) { vec.allocateNew(3); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); try (IntVector pivotBuffer = (IntVector) vec.getField().createVector(allocator)) { // setup internal data structures @@ -216,25 +214,25 @@ public void testChoosePivotAllPermutes() { @Test public void testSortInt2() { try (IntVector vector = new IntVector("vector", allocator)) { - ValueVectorDataPopulator.setVector(vector, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vector, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, + 11, 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); int[] actual = new int[vector.getValueCount()]; - IntStream.range(0, vector.getValueCount()).forEach( - i -> actual[i] = vector.get(i)); + IntStream.range(0, vector.getValueCount()).forEach(i -> actual[i] = vector.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java index cc13e7f8ceaee..4096897c20a05 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthOutOfPlaceVectorSorter.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertTrue; import static org.junit.jupiter.api.Assertions.assertArrayEquals; import java.util.stream.IntStream; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link FixedWidthOutOfPlaceVectorSorter}. */ public class TestFixedWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,7 +45,9 @@ public TestFixedWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new FixedWidthOutOfPlaceVectorSorter<>(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new FixedWidthOutOfPlaceVectorSorter<>(); } @Before @@ -82,10 +80,11 @@ public void testSortByte() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); TinyIntVector sortedVec = - (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (TinyIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -129,10 +128,11 @@ public void testSortShort() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); SmallIntVector sortedVec = - (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (SmallIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -176,9 +176,11 @@ public void testSortInt() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -222,9 +224,11 @@ public void testSortLong() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - BigIntVector sortedVec = (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + BigIntVector sortedVec = + (BigIntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -268,9 +272,11 @@ public void testSortFloat() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float4Vector sortedVec = (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float4Vector sortedVec = + (Float4Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -314,9 +320,11 @@ public void testSortDouble() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - Float8Vector sortedVec = (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + Float8Vector sortedVec = + (Float8Vector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -343,17 +351,17 @@ public void testSortDouble() { @Test public void testSortInt2() { try (IntVector vec = new IntVector("", allocator)) { - ValueVectorDataPopulator.setVector(vec, - 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, - 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, - 8, 9, 10, 11, 36, 37, 38, 39, 40, 41, - 66, 67, 68, 69, 70, 71); + ValueVectorDataPopulator.setVector( + vec, 0, 1, 2, 3, 4, 5, 30, 31, 32, 33, 34, 35, 60, 61, 62, 63, 64, 65, 6, 7, 8, 9, 10, 11, + 36, 37, 38, 39, 40, 41, 66, 67, 68, 69, 70, 71); // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); - try (IntVector sortedVec = (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (IntVector sortedVec = + (IntVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getValueCount()); sortedVec.setValueCount(vec.getValueCount()); @@ -361,13 +369,14 @@ public void testSortInt2() { // verify results int[] actual = new int[sortedVec.getValueCount()]; - IntStream.range(0, sortedVec.getValueCount()).forEach( - i -> actual[i] = sortedVec.get(i)); + IntStream.range(0, sortedVec.getValueCount()).forEach(i -> actual[i] = sortedVec.get(i)); assertArrayEquals( - new int[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, - 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, - 40, 41, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}, actual); + new int[] { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71 + }, + actual); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java index 80c72b4e21a27..a92cc77818f4a 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestFixedWidthSorting.java @@ -14,14 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseFixedWidthVector; @@ -37,9 +35,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting fixed width vectors with random data. - */ +/** Test sorting fixed width vectors with random data. */ @RunWith(Parameterized.class) public class TestFixedWidthSorting> { @@ -70,8 +66,12 @@ public void shutdown() { } public TestFixedWidthSorting( - int length, double nullFraction, boolean inPlace, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + boolean inPlace, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.inPlace = inPlace; @@ -94,7 +94,8 @@ void sortInPlace() { TestSortingUtil.sortArray(array); FixedWidthInPlaceVectorSorter sorter = new FixedWidthInPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); sorter.sortInPlace(vector, comparator); @@ -109,9 +110,11 @@ void sortOutOfPlace() { // sort the vector FixedWidthOutOfPlaceVectorSorter sorter = new FixedWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -123,47 +126,78 @@ void sortOutOfPlace() { } } - @Parameterized.Parameters(name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") + @Parameterized.Parameters( + name = "length = {0}, null fraction = {1}, in place = {2}, vector = {3}") public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { for (boolean inPlace : new boolean[] {true, false}) { - params.add(new Object[] { - length, nullFrac, inPlace, "TinyIntVector", - (Function) allocator -> new TinyIntVector("vector", allocator), - TestSortingUtil.TINY_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "SmallIntVector", - (Function) allocator -> new SmallIntVector("vector", allocator), - TestSortingUtil.SMALL_INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "IntVector", - (Function) allocator -> new IntVector("vector", allocator), - TestSortingUtil.INT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "BigIntVector", - (Function) allocator -> new BigIntVector("vector", allocator), - TestSortingUtil.LONG_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float4Vector", - (Function) allocator -> new Float4Vector("vector", allocator), - TestSortingUtil.FLOAT_GENERATOR - }); - - params.add(new Object[] { - length, nullFrac, inPlace, "Float8Vector", - (Function) allocator -> new Float8Vector("vector", allocator), - TestSortingUtil.DOUBLE_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "TinyIntVector", + (Function) + allocator -> new TinyIntVector("vector", allocator), + TestSortingUtil.TINY_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "SmallIntVector", + (Function) + allocator -> new SmallIntVector("vector", allocator), + TestSortingUtil.SMALL_INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "IntVector", + (Function) + allocator -> new IntVector("vector", allocator), + TestSortingUtil.INT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "BigIntVector", + (Function) + allocator -> new BigIntVector("vector", allocator), + TestSortingUtil.LONG_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float4Vector", + (Function) + allocator -> new Float4Vector("vector", allocator), + TestSortingUtil.FLOAT_GENERATOR + }); + + params.add( + new Object[] { + length, + nullFrac, + inPlace, + "Float8Vector", + (Function) + allocator -> new Float8Vector("vector", allocator), + TestSortingUtil.DOUBLE_GENERATOR + }); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java index 07a6b545ddaa2..9e796a98ab790 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestGeneralOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -30,9 +29,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link GeneralOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link GeneralOutOfPlaceVectorSorter}. */ public class TestGeneralOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -49,30 +46,33 @@ public void shutdown() { VectorValueComparator getComparator(StructVector structVector) { IntVector child0 = structVector.getChild("column0", IntVector.class); - VectorValueComparator childComp0 = DefaultVectorComparators.createDefaultComparator(child0); + VectorValueComparator childComp0 = + DefaultVectorComparators.createDefaultComparator(child0); childComp0.attachVector(child0); IntVector child1 = structVector.getChild("column1", IntVector.class); - VectorValueComparator childComp1 = DefaultVectorComparators.createDefaultComparator(child1); + VectorValueComparator childComp1 = + DefaultVectorComparators.createDefaultComparator(child1); childComp1.attachVector(child1); - VectorValueComparator comp = new VectorValueComparator() { - - @Override - public int compareNotNull(int index1, int index2) { - // compare values by lexicographic order - int result0 = childComp0.compare(index1, index2); - if (result0 != 0) { - return result0; - } - return childComp1.compare(index1, index2); - } - - @Override - public VectorValueComparator createNew() { - return this; - } - }; + VectorValueComparator comp = + new VectorValueComparator() { + + @Override + public int compareNotNull(int index1, int index2) { + // compare values by lexicographic order + int result0 = childComp0.compare(index1, index2); + if (result0 != 0) { + return result0; + } + return childComp1.compare(index1, index2); + } + + @Override + public VectorValueComparator createNew() { + return this; + } + }; return comp; } @@ -81,17 +81,21 @@ public VectorValueComparator createNew() { public void testSortStructVector() { final int vectorLength = 7; try (StructVector srcVector = StructVector.empty("src struct", allocator); - StructVector dstVector = StructVector.empty("dst struct", allocator)) { + StructVector dstVector = StructVector.empty("dst struct", allocator)) { IntVector srcChild0 = - srcVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector srcChild1 = - srcVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + srcVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild0 = - dstVector.addOrGet("column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column0", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); IntVector dstChild1 = - dstVector.addOrGet("column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); + dstVector.addOrGet( + "column1", FieldType.nullable(new ArrowType.Int(32, true)), IntVector.class); // src struct vector values: // [ @@ -128,15 +132,16 @@ public void testSortStructVector() { // validate results assertEquals(vectorLength, dstVector.getValueCount()); assertEquals( - "[" + - "null, " + - "{\"column1\":3}, " + - "{\"column0\":2,\"column1\":1}, " + - "{\"column0\":3,\"column1\":4}, " + - "{\"column0\":5,\"column1\":4}, " + - "{\"column0\":6,\"column1\":6}, " + - "{\"column0\":7}" + - "]", dstVector.toString()); + "[" + + "null, " + + "{\"column1\":3}, " + + "{\"column0\":2,\"column1\":1}, " + + "{\"column0\":3,\"column1\":4}, " + + "{\"column0\":5,\"column1\":4}, " + + "{\"column0\":6,\"column1\":6}, " + + "{\"column0\":7}" + + "]", + dstVector.toString()); } } } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java index 99e22f8bdcd5c..bc8aac08b61e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestIndexSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link IndexSorter}. - */ +/** Test cases for {@link IndexSorter}. */ public class TestIndexSorter { private BufferAllocator allocator; @@ -56,14 +53,15 @@ public void testIndexSort() { // sort the index IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); IntVector indices = new IntVector("", allocator); indices.setValueCount(10); indexSorter.sort(vec, indices, intComparator); - int[] expected = new int[]{6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; + int[] expected = new int[] {6, 9, 1, 3, 0, 4, 5, 7, 2, 8}; for (int i = 0; i < expected.length; i++) { assertTrue(!indices.isNull(i)); @@ -74,8 +72,8 @@ public void testIndexSort() { } /** - * Tests the worst case for quick sort. - * It may cause stack overflow if the algorithm is implemented as a recursive algorithm. + * Tests the worst case for quick sort. It may cause stack overflow if the algorithm is + * implemented as a recursive algorithm. */ @Test public void testSortLargeIncreasingInt() { @@ -91,7 +89,8 @@ public void testSortLargeIncreasingInt() { // sort the vector IndexSorter indexSorter = new IndexSorter<>(); - DefaultVectorComparators.IntComparator intComparator = new DefaultVectorComparators.IntComparator(); + DefaultVectorComparators.IntComparator intComparator = + new DefaultVectorComparators.IntComparator(); intComparator.attachVector(vec); try (IntVector indices = new IntVector("", allocator)) { @@ -110,7 +109,7 @@ public void testSortLargeIncreasingInt() { public void testChoosePivot() { final int vectorLength = 100; try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(vectorLength); indices.allocateNew(vectorLength); @@ -122,7 +121,8 @@ public void testChoosePivot() { vec.setValueCount(vectorLength); indices.setValueCount(vectorLength); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); @@ -147,17 +147,16 @@ public void testChoosePivot() { } } - /** - * Evaluates choosing pivot for all possible permutations of 3 numbers. - */ + /** Evaluates choosing pivot for all possible permutations of 3 numbers. */ @Test public void testChoosePivotAllPermutes() { try (IntVector vec = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { vec.allocateNew(); indices.allocateNew(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vec); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vec); // setup internal data structures comparator.attachVector(vec); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java index ba9c42913c0d9..3b16ac30d4ff4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestInsertionSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertFalse; @@ -28,9 +27,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link InsertionSorter}. - */ +/** Test cases for {@link InsertionSorter}. */ public class TestInsertionSorter { private BufferAllocator allocator; @@ -49,7 +46,7 @@ public void shutdown() { private void testSortIntVectorRange(int start, int end, int[] expected) { try (IntVector vector = new IntVector("vector", allocator); - IntVector buffer = new IntVector("buffer", allocator)) { + IntVector buffer = new IntVector("buffer", allocator)) { buffer.allocateNew(1); @@ -81,7 +78,7 @@ public void testSortIntVector() { private void testSortIndicesRange(int start, int end, int[] expectedIndices) { try (IntVector vector = new IntVector("vector", allocator); - IntVector indices = new IntVector("indices", allocator)) { + IntVector indices = new IntVector("indices", allocator)) { ValueVectorDataPopulator.setVector(vector, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); ValueVectorDataPopulator.setVector(indices, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java index 321ca226d7e1d..025576f08e248 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOffHeapIntStack.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static junit.framework.TestCase.assertEquals; @@ -26,9 +25,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link OffHeapIntStack}. - */ +/** Test cases for {@link OffHeapIntStack}. */ public class TestOffHeapIntStack { private BufferAllocator allocator; diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java index 66b75cbccac3e..4f6a8489c43ea 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestOutOfPlaceVectorSorter.java @@ -14,19 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import java.util.ArrayList; import java.util.Collection; import java.util.List; - import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test cases for out-of-place sorters. - */ +/** Test cases for out-of-place sorters. */ @RunWith(Parameterized.class) public abstract class TestOutOfPlaceVectorSorter { diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java index e22b22d4e6757..24b2c752d0863 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestSortingUtil.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertEquals; @@ -25,7 +24,6 @@ import java.util.Random; import java.util.function.BiConsumer; import java.util.function.Supplier; - import org.apache.arrow.vector.BigIntVector; import org.apache.arrow.vector.Float4Vector; import org.apache.arrow.vector.Float8Vector; @@ -37,50 +35,59 @@ import org.apache.arrow.vector.testing.RandomDataGenerator; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; -/** - * Utilities for sorting related utilities. - */ +/** Utilities for sorting related utilities. */ public class TestSortingUtil { static final Random random = new Random(0); - static final DataGenerator TINY_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.TINY_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Byte.class); - - static final DataGenerator SMALL_INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.SMALL_INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Short.class); - - static final DataGenerator INT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.INT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Integer.class); - - static final DataGenerator LONG_GENERATOR = new DataGenerator<>( - RandomDataGenerator.LONG_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Long.class); - - static final DataGenerator FLOAT_GENERATOR = new DataGenerator<>( - RandomDataGenerator.FLOAT_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Float.class); - - static final DataGenerator DOUBLE_GENERATOR = new DataGenerator<>( - RandomDataGenerator.DOUBLE_GENERATOR, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), Double.class); - - static final DataGenerator STRING_GENERATOR = new DataGenerator<>( - () -> { - int strLength = random.nextInt(20) + 1; - return generateRandomString(strLength); - }, - (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), String.class); - - private TestSortingUtil() { - } - - /** - * Verify that a vector is equal to an array. - */ + static final DataGenerator TINY_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.TINY_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Byte.class); + + static final DataGenerator SMALL_INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.SMALL_INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Short.class); + + static final DataGenerator INT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.INT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Integer.class); + + static final DataGenerator LONG_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.LONG_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Long.class); + + static final DataGenerator FLOAT_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.FLOAT_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Float.class); + + static final DataGenerator DOUBLE_GENERATOR = + new DataGenerator<>( + RandomDataGenerator.DOUBLE_GENERATOR, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + Double.class); + + static final DataGenerator STRING_GENERATOR = + new DataGenerator<>( + () -> { + int strLength = random.nextInt(20) + 1; + return generateRandomString(strLength); + }, + (vector, array) -> ValueVectorDataPopulator.setVector(vector, array), + String.class); + + private TestSortingUtil() {} + + /** Verify that a vector is equal to an array. */ public static void verifyResults(V vector, U[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { @@ -88,30 +95,28 @@ public static void verifyResults(V vector, U[] expect } } - /** - * Sort an array with null values come first. - */ + /** Sort an array with null values come first. */ public static > void sortArray(U[] array) { - Arrays.sort(array, (a, b) -> { - if (a == null || b == null) { - if (a == null && b == null) { - return 0; - } - - // exactly one is null - if (a == null) { - return -1; - } else { - return 1; - } - } - return a.compareTo(b); - }); + Arrays.sort( + array, + (a, b) -> { + if (a == null || b == null) { + if (a == null && b == null) { + return 0; + } + + // exactly one is null + if (a == null) { + return -1; + } else { + return 1; + } + } + return a.compareTo(b); + }); } - /** - * Generate a string with alphabetic characters only. - */ + /** Generate a string with alphabetic characters only. */ static String generateRandomString(int length) { byte[] str = new byte[length]; final int lower = 'a'; @@ -128,6 +133,7 @@ static String generateRandomString(int length) { /** * Utility to generate data for testing. + * * @param vector type. * @param data element type. */ @@ -139,8 +145,7 @@ static class DataGenerator> { final Class clazz; - DataGenerator( - Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { + DataGenerator(Supplier dataGenerator, BiConsumer vectorPopulator, Class clazz) { this.dataGenerator = dataGenerator; this.vectorPopulator = vectorPopulator; this.clazz = clazz; @@ -148,6 +153,7 @@ static class DataGenerator> { /** * Populate the vector according to the specified parameters. + * * @param vector the vector to populate. * @param length vector length. * @param nullFraction the fraction of null values. diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java index f2de5d23fce89..ce15940c1df3d 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestStableVectorComparator.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.VarCharVector; @@ -31,9 +29,7 @@ import org.junit.Test; import org.junit.jupiter.api.Assertions; -/** - * Test cases for {@link StableVectorComparator}. - */ +/** Test cases for {@link StableVectorComparator}. */ public class TestStableVectorComparator { private BufferAllocator allocator; @@ -62,7 +58,8 @@ public void testCompare() { vec.set(4, "a".getBytes(StandardCharsets.UTF_8)); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); stableComparator.attachVector(vec); assertTrue(stableComparator.compare(0, 1) > 0); @@ -95,10 +92,12 @@ public void testStableSortString() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); VectorValueComparator comparator = new TestVarCharSorter(); - VectorValueComparator stableComparator = new StableVectorComparator<>(comparator); + VectorValueComparator stableComparator = + new StableVectorComparator<>(comparator); try (VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { + (VarCharVector) + vec.getField().getFieldType().createNewSingleVector("", allocator, null)) { sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -107,23 +106,32 @@ public void testStableSortString() { // verify results // the results are stable - assertEquals("0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); - assertEquals("01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); - assertEquals("0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("abcdefg", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "0", new String(Objects.requireNonNull(sortedVec.get(0)), StandardCharsets.UTF_8)); + assertEquals( + "01", new String(Objects.requireNonNull(sortedVec.get(1)), StandardCharsets.UTF_8)); + assertEquals( + "0c", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "a", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "aa", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "a1", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "abcdefg", + new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "accc", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "afds", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); } } } - /** - * Utility comparator that compares varchars by the first character. - */ + /** Utility comparator that compares varchars by the first character. */ private static class TestVarCharSorter extends VectorValueComparator { @Override diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java index 2486034f1fa32..b3f2539fa53c2 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthOutOfPlaceVectorSorter.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.Assert.assertEquals; @@ -22,7 +21,6 @@ import java.nio.charset.StandardCharsets; import java.util.Objects; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -32,9 +30,7 @@ import org.junit.Before; import org.junit.Test; -/** - * Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. - */ +/** Test cases for {@link VariableWidthOutOfPlaceVectorSorter}. */ public class TestVariableWidthOutOfPlaceVectorSorter extends TestOutOfPlaceVectorSorter { private BufferAllocator allocator; @@ -44,10 +40,11 @@ public TestVariableWidthOutOfPlaceVectorSorter(boolean generalSorter) { } OutOfPlaceVectorSorter getSorter() { - return generalSorter ? new GeneralOutOfPlaceVectorSorter<>() : new VariableWidthOutOfPlaceVectorSorter(); + return generalSorter + ? new GeneralOutOfPlaceVectorSorter<>() + : new VariableWidthOutOfPlaceVectorSorter(); } - @Before public void prepare() { allocator = new RootAllocator(1024 * 1024); @@ -79,10 +76,10 @@ public void testSortString() { // sort the vector OutOfPlaceVectorSorter sorter = getSorter(); VectorValueComparator comparator = - DefaultVectorComparators.createDefaultComparator(vec); + DefaultVectorComparators.createDefaultComparator(vec); VarCharVector sortedVec = - (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); + (VarCharVector) vec.getField().getFieldType().createNewSingleVector("", allocator, null); sortedVec.allocateNew(vec.getByteCapacity(), vec.getValueCount()); sortedVec.setLastSet(vec.getValueCount() - 1); sortedVec.setValueCount(vec.getValueCount()); @@ -96,14 +93,23 @@ public void testSortString() { assertTrue(sortedVec.isNull(0)); assertTrue(sortedVec.isNull(1)); - assertEquals("12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); - assertEquals("abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); - assertEquals("dictionary", new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); - assertEquals("good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); - assertEquals("hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); - assertEquals("world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); - assertEquals("yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); + assertEquals( + "12", new String(Objects.requireNonNull(sortedVec.get(2)), StandardCharsets.UTF_8)); + assertEquals( + "abc", new String(Objects.requireNonNull(sortedVec.get(3)), StandardCharsets.UTF_8)); + assertEquals( + "dictionary", + new String(Objects.requireNonNull(sortedVec.get(4)), StandardCharsets.UTF_8)); + assertEquals( + "good", new String(Objects.requireNonNull(sortedVec.get(5)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(6)), StandardCharsets.UTF_8)); + assertEquals( + "hello", new String(Objects.requireNonNull(sortedVec.get(7)), StandardCharsets.UTF_8)); + assertEquals( + "world", new String(Objects.requireNonNull(sortedVec.get(8)), StandardCharsets.UTF_8)); + assertEquals( + "yes", new String(Objects.requireNonNull(sortedVec.get(9)), StandardCharsets.UTF_8)); sortedVec.close(); } diff --git a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java index 7951c39d550d2..5c37ddf9284e4 100644 --- a/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java +++ b/java/algorithm/src/test/java/org/apache/arrow/algorithm/sort/TestVariableWidthSorting.java @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.arrow.algorithm.sort; import static org.junit.jupiter.api.Assertions.assertArrayEquals; @@ -28,7 +27,6 @@ import java.util.Comparator; import java.util.List; import java.util.function.Function; - import org.apache.arrow.memory.BufferAllocator; import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.BaseVariableWidthVector; @@ -41,9 +39,7 @@ import org.junit.runner.RunWith; import org.junit.runners.Parameterized; -/** - * Test sorting variable width vectors with random data. - */ +/** Test sorting variable width vectors with random data. */ @RunWith(Parameterized.class) public class TestVariableWidthSorting> { @@ -72,8 +68,11 @@ public void shutdown() { } public TestVariableWidthSorting( - int length, double nullFraction, String desc, - Function vectorGenerator, TestSortingUtil.DataGenerator dataGenerator) { + int length, + double nullFraction, + String desc, + Function vectorGenerator, + TestSortingUtil.DataGenerator dataGenerator) { this.length = length; this.nullFraction = nullFraction; this.vectorGenerator = vectorGenerator; @@ -92,9 +91,11 @@ void sortOutOfPlace() { // sort the vector VariableWidthOutOfPlaceVectorSorter sorter = new VariableWidthOutOfPlaceVectorSorter(); - VectorValueComparator comparator = DefaultVectorComparators.createDefaultComparator(vector); + VectorValueComparator comparator = + DefaultVectorComparators.createDefaultComparator(vector); - try (V sortedVec = (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { + try (V sortedVec = + (V) vector.getField().getFieldType().createNewSingleVector("", allocator, null)) { int dataSize = vector.getOffsetBuffer().getInt(vector.getValueCount() * 4L); sortedVec.allocateNew(dataSize, vector.getValueCount()); sortedVec.setValueCount(vector.getValueCount()); @@ -112,33 +113,36 @@ public static Collection getParameters() { List params = new ArrayList<>(); for (int length : VECTOR_LENGTHS) { for (double nullFrac : NULL_FRACTIONS) { - params.add(new Object[]{ - length, nullFrac, "VarCharVector", - (Function) allocator -> new VarCharVector("vector", allocator), - TestSortingUtil.STRING_GENERATOR - }); + params.add( + new Object[] { + length, + nullFrac, + "VarCharVector", + (Function) + allocator -> new VarCharVector("vector", allocator), + TestSortingUtil.STRING_GENERATOR + }); } } return params; } - /** - * Verify results as byte arrays. - */ + /** Verify results as byte arrays. */ public static void verifyResults(V vector, String[] expected) { assertEquals(vector.getValueCount(), expected.length); for (int i = 0; i < expected.length; i++) { if (expected[i] == null) { assertTrue(vector.isNull(i)); } else { - assertArrayEquals(((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); + assertArrayEquals( + ((Text) vector.getObject(i)).getBytes(), expected[i].getBytes(StandardCharsets.UTF_8)); } } } /** - * String comparator with the same behavior as that of - * {@link DefaultVectorComparators.VariableWidthComparator}. + * String comparator with the same behavior as that of {@link + * DefaultVectorComparators.VariableWidthComparator}. */ static class StringComparator implements Comparator { diff --git a/java/spotless/asf-java.license b/java/spotless/asf-java.license new file mode 100644 index 0000000000000..3e7c6c26f5578 --- /dev/null +++ b/java/spotless/asf-java.license @@ -0,0 +1,16 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ diff --git a/java/spotless/asf-xml.license b/java/spotless/asf-xml.license new file mode 100644 index 0000000000000..a43b97bca8f0f --- /dev/null +++ b/java/spotless/asf-xml.license @@ -0,0 +1,11 @@ + + \ No newline at end of file