From 5958b8a792c9d49e9483f80ebf89fc5f0ba637de Mon Sep 17 00:00:00 2001 From: Todd Date: Wed, 4 Dec 2024 18:27:18 -0500 Subject: [PATCH] Implement ordering by frequency --- CHANGELOG.md | 1 + README.md | 75 +++++++---- .../gatherers4j/FrequencyGatherer.java | 86 ++++++++++++ .../com/ginsberg/gatherers4j/Gatherers4j.java | 124 +++++++++++++----- .../com/ginsberg/gatherers4j/WithCount.java | 25 ++++ .../gatherers4j/FrequencyGathererTest.java | 104 +++++++++++++++ 6 files changed, 354 insertions(+), 61 deletions(-) create mode 100644 src/main/java/com/ginsberg/gatherers4j/FrequencyGatherer.java create mode 100644 src/main/java/com/ginsberg/gatherers4j/WithCount.java create mode 100644 src/test/java/com/ginsberg/gatherers4j/FrequencyGathererTest.java diff --git a/CHANGELOG.md b/CHANGELOG.md index 960c8aa..b7be962 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ ### 0.7.0 + Use greedy integrators where possible (Fixes #57) + Add [JSpecify](https://jspecify.dev/) annotations for static analysis ++ Implement `orderByFrequencyAscending()` and `orderByFrequencyDescending()` ### 0.6.0 + Implement `dropLast(n)` diff --git a/README.md b/README.md index d2fd86f..77feea1 100644 --- a/README.md +++ b/README.md @@ -31,32 +31,34 @@ implementation("com.ginsberg:gatherers4j:0.7.0") ### Streams -| Function | Purpose | -|------------------------------|--------------------------------------------------------------------------------------------------------------------------------| -| `debounce(amount, duration)` | Limit stream elements to `amount` elements over `duration`, dropping any elements over the limit until a new `duration` starts | -| `dedupeConsecutive()` | Remove consecutive duplicates from a stream | -| `dedupeConsecutiveBy(fn)` | Remove consecutive duplicates from a stream as returned by `fn` | -| `distinctBy(fn)` | Emit only distinct elements from the stream, as measured by `fn` | -| `dropLast(n)` | Keep all but the last `n` elements of the stream | -| `exactSize(n)` | Ensure the stream is exactly `n` elements long, or throw an `IllegalStateException` | -| `filterWithIndex(predicate)` | Filter the stream with the given `predicate`, which takes an `element` and its `index` | -| `grouping()` | Group consecute identical elements into lists | -| `groupingBy(fn)` | Group consecutive elements that are identical according to `fn` into lists | -| `interleave(iterable)` | Creates a stream of alternating objects from the input stream and the argument iterable | -| `interleave(iterator)` | Creates a stream of alternating objects from the input stream and the argument iterator | -| `interleave(stream)` | Creates a stream of alternating objects from the input stream and the argument stream | -| `last(n)` | Constrain the stream to the last `n` values | -| `maxBy(fn)` | Return a stream containing a single element, which is the maximum value returned by the mapping function `fn` | -| `minBy(fn)` | Return a stream containing a single element, which is the minimum value returned by the mapping function `fn` | -| `reverse()` | Reverse the order of the stream | -| `shuffle()` | Shuffle the stream into a random order using the platform default `RandomGenerator` | -| `shuffle(rg)` | Shuffle the stream into a random order using the specified `RandomGenerator` | -| `throttle(amount, duration)` | Limit stream elements to `amount` elements over `duration`, pausing until a new `duration` period starts | -| `withIndex()` | Maps all elements of the stream as-is along with their 0-based index | -| `zipWith(iterable)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterable | -| `zipWith(iterator)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterator | -| `zipWith(stream)` | Creates a stream of `Pair` objects whose values come from the input stream and argument stream | -| `zipWithNext()` | Creates a stream of `List` objects via a sliding window of width 2 and stepping 1 | +| Function | Purpose | +|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------| +| `debounce(amount, duration)` | Limit stream elements to `amount` elements over `duration`, dropping any elements over the limit until a new `duration` starts | +| `dedupeConsecutive()` | Remove consecutive duplicates from a stream | +| `dedupeConsecutiveBy(fn)` | Remove consecutive duplicates from a stream as returned by `fn` | +| `distinctBy(fn)` | Emit only distinct elements from the stream, as measured by `fn` | +| `dropLast(n)` | Keep all but the last `n` elements of the stream | +| `exactSize(n)` | Ensure the stream is exactly `n` elements long, or throw an `IllegalStateException` | +| `filterWithIndex(predicate)` | Filter the stream with the given `predicate`, which takes an `element` and its `index` | +| `grouping()` | Group consecute identical elements into lists | +| `groupingBy(fn)` | Group consecutive elements that are identical according to `fn` into lists | +| `interleave(iterable)` | Creates a stream of alternating objects from the input stream and the argument iterable | +| `interleave(iterator)` | Creates a stream of alternating objects from the input stream and the argument iterator | +| `interleave(stream)` | Creates a stream of alternating objects from the input stream and the argument stream | +| `last(n)` | Constrain the stream to the last `n` values | +| `maxBy(fn)` | Return a stream containing a single element, which is the maximum value returned by the mapping function `fn` | +| `minBy(fn)` | Return a stream containing a single element, which is the minimum value returned by the mapping function `fn` | +| `orderByFrequencyAscending() | Returns a stream where elements are ordered from least to most frequent as `WithCount` wrapper objects. | +| `orderByFrequencyDescending() | Returns a stream where elements are ordered from most to least frequent as `WithCount` wrapper objects. | +| `reverse()` | Reverse the order of the stream | +| `shuffle()` | Shuffle the stream into a random order using the platform default `RandomGenerator` | +| `shuffle(rg)` | Shuffle the stream into a random order using the specified `RandomGenerator` | +| `throttle(amount, duration)` | Limit stream elements to `amount` elements over `duration`, pausing until a new `duration` period starts | +| `withIndex()` | Maps all elements of the stream as-is along with their 0-based index | +| `zipWith(iterable)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterable | +| `zipWith(iterator)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterator | +| `zipWith(stream)` | Creates a stream of `Pair` objects whose values come from the input stream and argument stream | +| `zipWithNext()` | Creates a stream of `List` objects via a sliding window of width 2 and stepping 1 | ### Mathematics/Statistics @@ -246,6 +248,27 @@ streamOfPeople // Person("Baby", 1) ``` + +#### Order elements by frequency, ascending + +```java +Stream.of("A", "A", "A", "B", "B" ,"C") + .gather(Gatherers4j.orderByFrequencyAscending()) + .toList() + +// [WithCount("C", 1), WithCount("B", 2), WithCount("C", 3) ] +``` + +#### Order elements by frequency, descending + +```java +Stream.of("A", "A", "A", "B", "B" ,"C") + .gather(Gatherers4j.orderByFrequencyDescending()) + .toList() + +// [WithCount("C", 3), WithCount("B", 2), WithCount("A", 1) ] +``` + #### Reverse the order of the stream ```java diff --git a/src/main/java/com/ginsberg/gatherers4j/FrequencyGatherer.java b/src/main/java/com/ginsberg/gatherers4j/FrequencyGatherer.java new file mode 100644 index 0000000..0c0f36c --- /dev/null +++ b/src/main/java/com/ginsberg/gatherers4j/FrequencyGatherer.java @@ -0,0 +1,86 @@ +/* + * Copyright 2024 Todd Ginsberg + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.ginsberg.gatherers4j; + +import org.jspecify.annotations.Nullable; + +import java.util.Comparator; +import java.util.HashMap; +import java.util.Map; +import java.util.function.BiConsumer; +import java.util.function.BinaryOperator; +import java.util.function.Supplier; +import java.util.stream.Gatherer; + +import static com.ginsberg.gatherers4j.GathererUtils.mustNotBeNull; + +public class FrequencyGatherer + implements Gatherer, WithCount> { + + public enum Order { + Ascending, + Descending + } + private final Order order; + + FrequencyGatherer(final Order order) { + mustNotBeNull(order, "Order must be specified"); + this.order = order; + } + + @Override + public Supplier> initializer() { + return State::new; + } + + @Override + public Integrator, INPUT, WithCount> integrator() { + return Integrator.ofGreedy((state, element, downstream) -> { + state.counts.compute(element, (_, count) -> count == null ? 1 : count + 1); + return !downstream.isRejecting(); + }); + } + + @Override + public BinaryOperator> combiner() { + return (state1, state2) -> { + state2.counts.forEach((key, value) -> state1.counts.merge(key, value, Long::sum)); + return state1; + }; + } + + @Override + public BiConsumer, Downstream>> finisher() { + return (inputState, downstream) -> inputState.counts + .entrySet() + .stream().map(it -> new WithCount<>(it.getKey(), it.getValue())) + .sorted(comparator()) + .forEach(downstream::push); + } + + private Comparator> comparator() { + if(order == Order.Descending) { + return (o1, o2) -> (int)(o2.count() - o1.count()); + } else { + return (o1, o2) -> (int)(o1.count() - o2.count()); + } + } + + public static class State { + final Map counts = new HashMap<>(); + } +} diff --git a/src/main/java/com/ginsberg/gatherers4j/Gatherers4j.java b/src/main/java/com/ginsberg/gatherers4j/Gatherers4j.java index cee2c10..b8c869b 100644 --- a/src/main/java/com/ginsberg/gatherers4j/Gatherers4j.java +++ b/src/main/java/com/ginsberg/gatherers4j/Gatherers4j.java @@ -64,7 +64,7 @@ public static DedupeConsecutiveGatherer dedupeConsecutiveBy(final /// Filter a stream such that it only contains distinct elements measured by the given `function`. /// /// @param mappingFunction A non-null mapping function, the results of which will be used to check for distinct elements - /// @param Type of elements in both the input and output streams + /// @param Type of elements in both the input and output streams /// @return A non-null `DistinctGatherer` public static DistinctGatherer distinctBy(final Function mappingFunction) { return new DistinctGatherer<>(mappingFunction); @@ -72,7 +72,7 @@ public static DedupeConsecutiveGatherer dedupeConsecutiveBy(final /// Keep all elements except the last `count` elements of the stream. /// - /// @param count A positive number of elements to drop from the end of the stream + /// @param count A positive number of elements to drop from the end of the stream /// @param Type of elements in both the input and output streams /// @return A non-null `DropLastGatherer` public static DropLastGatherer dropLast(final int count) { @@ -82,10 +82,10 @@ public static DropLastGatherer dropLast(final int count) { /// Ensure the input stream is exactly `size` elements long, and emit all elements if so. /// If not, throw an `IllegalStateException`. /// - /// @param size Exact number of elements the stream must have + /// @param size Exact number of elements the stream must have /// @param Type of elements in both the input and output streams - /// @throws IllegalStateException when the input stream is not exactly `size` elements long /// @return A non-null `SizeGatherer` + /// @throws IllegalStateException when the input stream is not exactly `size` elements long public static SizeGatherer exactSize(final long size) { return new SizeGatherer<>(size); } @@ -94,8 +94,8 @@ public static SizeGatherer exactSize(final long size) { /// and its index. /// /// @param predicate A non-null `BiPredicate` where the `Long` is the zero-based index of the element - /// being filtered, and the `INPUT` is the element itself. - /// @param Type of elements in the input stream + /// being filtered, and the `INPUT` is the element itself. + /// @param Type of elements in the input stream /// @return A non-null `FilteringWithIndexGatherer` public static FilteringWithIndexGatherer filterWithIndex( final BiPredicate predicate @@ -116,7 +116,7 @@ public static GroupingByGatherer grouping() { /// equal elements, where equality is measured by the given `mappingFunction`, are in the same `List`. /// /// @param mappingFunction A non-null function, the results of which are used to measure equality of consecutive elements. - /// @param Type of elements in the input stream + /// @param Type of elements in the input stream /// @return A non-null `GroupingByGatherer` public static GroupingByGatherer groupingBy(final Function mappingFunction) { return new GroupingByGatherer<>(mappingFunction); @@ -124,7 +124,7 @@ public static GroupingByGatherer groupingBy(final Function Type of elements in both the input stream and argument iterable /// @return A non-null `InterleavingGatherer` public static InterleavingGatherer interleave(final Iterable other) { @@ -133,7 +133,7 @@ public static InterleavingGatherer interleave(final Iterable Type of elements in both the input stream and argument iterator /// @return A non-null `InterleavingGatherer` public static InterleavingGatherer interleave(final Iterator other) { @@ -142,7 +142,7 @@ public static InterleavingGatherer interleave(final Iterator Type of elements in both the input and argument streams /// @return A non-null `InterleavingGatherer` public static InterleavingGatherer interleave(final Stream other) { @@ -151,7 +151,7 @@ public static InterleavingGatherer interleave(final Stream /// Remove all but the last `count` elements from the stream. /// - /// @param count A non-negative integer, the number of elements to return + /// @param count A non-negative integer, the number of elements to return /// @param Type of elements in the input stream /// @return A non-null `LastGatherer` public static LastGatherer last(final int count) { @@ -164,8 +164,8 @@ public static LastGatherer last(final int count) { /// evaluate null values or null mappings. /// /// @param mappingFunction A mapping function, the results of which must implement `Comparable` - /// @param Type of elements in the input stream - /// @param Type of object returned from the `mappingFunction`, which much implement `Comparable` + /// @param Type of elements in the input stream + /// @param Type of object returned from the `mappingFunction`, which much implement `Comparable` /// @return A non-null `MinMaxGatherer` public static > MinMaxGatherer maxBy( final Function mappingFunction @@ -179,8 +179,8 @@ public static > MinMaxGatherer Type of elements in the input stream - /// @param Type of object returned from the `mappingFunction`, which much implement `Comparable` + /// @param Type of elements in the input stream + /// @param Type of object returned from the `mappingFunction`, which much implement `Comparable` /// @return A non-null `MinMaxGatherer` public static > MinMaxGatherer minBy( final Function mappingFunction @@ -188,10 +188,58 @@ public static > MinMaxGatherer(false, mappingFunction); } + + /// Emit elements in the input stream ordered by frequency from least frequently occurring + /// to most frequently occurring. Elements are emitted wrapped in `WithCount` objects + /// that carry the element and the number of occurrences. + /// + /// Example: + /// ``` + /// Stream.of("A", "A", "A", "B", "B", "C") + /// .gather(orderByFrequencyAscending()) + /// .toList(); + /// + /// // Produces: + /// [WithCount("C", 1), WithCount("B", 2), WithCount("A", 4)] + /// ``` + /// + /// Note: This consumes the entire stream and holds it in memory, so it will not work on infinite + /// streams and may cause memory pressure on very large streams. + /// + /// @param Type of elements in the input stream + /// @return A non-null `FrequencyGatherer` + public static FrequencyGatherer orderByFrequencyAscending() { + return new FrequencyGatherer<>(FrequencyGatherer.Order.Ascending); + } + + /// Emit elements in the input stream ordered by frequency from most frequently occurring + /// to least frequently occurring. Elements are emitted wrapped in `WithCount` objects + /// that carry the element and the number of occurrences. + /// + /// Example: + /// ``` + /// Stream.of("A", "A", "A", "B", "B", "C") + /// .gather(orderByFrequencyDescending()) + /// .toList(); + /// + /// // Produces: + /// [WithCount("A", 4), WithCount("B", 2), WithCount("C", 1)] + /// ``` + /// + /// Note: This consumes the entire stream and holds it in memory, so it will not work on infinite + /// streams and may cause memory pressure on very large streams. + /// + /// @param Type of elements in the input stream + /// @return A non-null `FrequencyGatherer` + public static FrequencyGatherer orderByFrequencyDescending() { + return new FrequencyGatherer<>(FrequencyGatherer.Order.Descending); + } + /// Reverse the order of the input Stream. /// /// Note: This consumes the entire stream and holds it in memory, so it will not work on infinite /// streams and may cause memory pressure on very large streams. + /// /// @param Type of elements in the input stream /// @return A non-null `ReversingGatherer` public static ReversingGatherer reverse() { @@ -200,6 +248,7 @@ public static ReversingGatherer reverse() { /// Create a `Stream` that represents the running population standard /// deviation of a `Stream`. + /// /// @return A non-null `BigDecimalStandardDeviationGatherer` public static BigDecimalStandardDeviationGatherer runningPopulationStandardDeviation() { return new BigDecimalStandardDeviationGatherer<>( @@ -209,11 +258,11 @@ public static BigDecimalStandardDeviationGatherer runningPopulationS } /// Create a `Stream` that represents the running population standard deviation of a `BigDecimal` - /// objects mappedfrom a `Stream` via a `mappingFunction`. + /// objects mapped from a `Stream` via a `mappingFunction`. /// /// @param mappingFunction A function to map `` objects to `BigDecimal`, the results of which will be used - /// in the standard deviation calculation - /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` + /// in the standard deviation calculation + /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` /// @return A non-null `BigDecimalStandardDeviationGatherer` public static BigDecimalStandardDeviationGatherer runningPopulationStandardDeviationBy( final Function mappingFunction @@ -225,6 +274,7 @@ public static BigDecimalStandardDeviationGatherer runningPopulati } /// Create a `Stream` that represents the running product of a `Stream`. + /// /// @return A non-null `BigDecimalProductGatherer` public static BigDecimalProductGatherer runningProduct() { return new BigDecimalProductGatherer<>(Function.identity()); @@ -234,8 +284,8 @@ public static BigDecimalProductGatherer runningProduct() { /// from a `Stream` via a `mappingFunction`. /// /// @param mappingFunction A function to map `` objects to `BigDecimal`, the results of which will be used - /// in the product calculation - /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` + /// in the product calculation + /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` /// @return A non-null `BigDecimalProductGatherer` public static BigDecimalProductGatherer runningProductBy( final Function mappingFunction @@ -244,6 +294,7 @@ public static BigDecimalProductGatherer runningProductBy( } /// Create a `Stream` that represents the running sample standard deviation of a `Stream`. + /// /// @return A non-null `BigDecimalStandardDeviationGatherer` public static BigDecimalStandardDeviationGatherer runningSampleStandardDeviation() { return new BigDecimalStandardDeviationGatherer<>( @@ -256,8 +307,8 @@ public static BigDecimalStandardDeviationGatherer runningSampleStand /// from a `Stream` via a `mappingFunction`. /// /// @param mappingFunction A function to map `` objects to `BigDecimal`, the results of which will be used - /// in the standard deviation calculation - /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` + /// in the standard deviation calculation + /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` /// @return A non-null `BigDecimalStandardDeviationGatherer` public static BigDecimalStandardDeviationGatherer runningSampleStandardDeviationBy( final Function mappingFunction @@ -269,6 +320,7 @@ public static BigDecimalStandardDeviationGatherer runningSampleSt } /// Create a `Stream` that represents the running sum of a `Stream`. + /// /// @return A non-null `BigDecimalSumGatherer` public static BigDecimalSumGatherer runningSum() { return new BigDecimalSumGatherer<>(Function.identity()); @@ -278,8 +330,8 @@ public static BigDecimalSumGatherer runningSum() { /// from a `Stream` via a `mappingFunction`. /// /// @param mappingFunction A function to map `` objects to `BigDecimal`, the results of which will be used - /// in the running sum calculation - /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` + /// in the running sum calculation + /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` /// @return A non-null `BigDecimalSumGatherer` public static BigDecimalSumGatherer runningSumBy( final Function mappingFunction @@ -311,7 +363,7 @@ public static ShufflingGatherer shuffle() { /// streams and may cause memory pressure on very large streams. /// /// @param randomGenerator A non-null `RandomGenerator` to use as a random source for the shuffle - /// @param Type of elements in the input stream + /// @param Type of elements in the input stream /// @return A non-null `ShufflingGatherer` public static ShufflingGatherer shuffle(final RandomGenerator randomGenerator) { return new ShufflingGatherer<>(randomGenerator); @@ -321,8 +373,8 @@ public static ShufflingGatherer shuffle(final RandomGenerator ran /// the given function. This is useful when paired with the `withOriginal` function. /// /// @param mappingFunction A function to map `` objects to `BigDecimal`, the results of which will be used - /// in the running average calculation - /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` + /// in the running average calculation + /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` /// @return A non-null `BigDecimalSimpleAverageGatherer` public static BigDecimalSimpleAverageGatherer simpleRunningAverageBy( final Function mappingFunction @@ -343,7 +395,7 @@ public static BigDecimalSimpleMovingAverageGatherer simpleMovingAver /// via a `mappingFunction` and looking back `windowSize` number of elements. /// /// @param mappingFunction A function to map `` objects to `BigDecimal`, the results of which will be used - /// in the moving average calculation + /// in the moving average calculation /// @param windowSize The number of elements to average, must be greater than 1. /// @param Type of elements in the input stream, to be remapped to `BigDecimal` by the `mappingFunction` /// @return A non-null `BigDecimalSimpleMovingAverageGatherer` @@ -359,13 +411,14 @@ public static BigDecimalSimpleMovingAverageGatherer simpleMovingA /// /// @param amount A positive number of elements to allow per period /// @param duration A positive duration for the length of the period - /// @param Type of elements in the input stream + /// @param Type of elements in the input stream /// @return A non-null `ThrottlingGatherer` public static ThrottlingGatherer throttle(final int amount, final Duration duration) { return new ThrottlingGatherer<>(ThrottlingGatherer.LimitRule.Pause, amount, duration); } /// Maps all elements of the stream as-is along with their 0-based index. + /// /// @param Type of elements in the input stream /// @return A non-null `IndexingGatherer` public static IndexingGatherer withIndex() { @@ -375,8 +428,8 @@ public static IndexingGatherer withIndex() { /// Creates a stream of `Pair` objects whose values come from the stream this is called on /// and the argument collection /// - /// @param other A non-null iterable to zip with - /// @param Type of object in the source stream + /// @param other A non-null iterable to zip with + /// @param Type of object in the source stream /// @param Type of object in the argument `Iterable` /// @return A non-null `ZipWithGatherer` public static ZipWithGatherer zipWith(final Iterable other) { @@ -386,8 +439,8 @@ public static ZipWithGatherer zipWith(final Itera /// Creates a stream of `Pair` objects whose values come from the stream this is called on /// and the argument iterator /// - /// @param other A non-null iterator to zip with - /// @param Type of object in the source stream + /// @param other A non-null iterator to zip with + /// @param Type of object in the source stream /// @param Type of object in the argument `Iterator` /// @return A non-null `ZipWithGatherer` public static ZipWithGatherer zipWith(final Iterator other) { @@ -397,8 +450,8 @@ public static ZipWithGatherer zipWith(final Itera /// Creates a stream of `Pair` objects whose values come from the stream this is called on /// and the argument stream /// - /// @param other A non-null stream to zip with - /// @param Type of object in the source stream + /// @param other A non-null stream to zip with + /// @param Type of object in the source stream /// @param Type of object in the argument `Stream` /// @return A non-null `ZipWithGatherer` public static ZipWithGatherer zipWith(final Stream other) { @@ -406,6 +459,7 @@ public static ZipWithGatherer zipWith(final Strea } /// Creates a stream of `List` objects which contain each two adjacent elements in the input stream. + /// /// @param Type of elements in the input stream /// @return A non-null `ZipWithNextGatherer` public static ZipWithNextGatherer zipWithNext() { diff --git a/src/main/java/com/ginsberg/gatherers4j/WithCount.java b/src/main/java/com/ginsberg/gatherers4j/WithCount.java new file mode 100644 index 0000000..6d04747 --- /dev/null +++ b/src/main/java/com/ginsberg/gatherers4j/WithCount.java @@ -0,0 +1,25 @@ +/* + * Copyright 2024 Todd Ginsberg + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.ginsberg.gatherers4j; + +import org.jspecify.annotations.Nullable; + +public record WithCount( + @Nullable VALUE value, + long count +) { +} \ No newline at end of file diff --git a/src/test/java/com/ginsberg/gatherers4j/FrequencyGathererTest.java b/src/test/java/com/ginsberg/gatherers4j/FrequencyGathererTest.java new file mode 100644 index 0000000..f91765c --- /dev/null +++ b/src/test/java/com/ginsberg/gatherers4j/FrequencyGathererTest.java @@ -0,0 +1,104 @@ +/* + * Copyright 2024 Todd Ginsberg + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.ginsberg.gatherers4j; + +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.stream.Stream; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +class FrequencyGathererTest { + + @Test + void ascending() { + // Arrange + final Stream input = Stream.of("A", "A", "A", "B", "B", "B", "B", "C", "C"); + + // Act + final List> output = input.gather(Gatherers4j.orderByFrequencyAscending()).toList(); + + // Assert + assertThat(output) + .containsExactly( + new WithCount<>("C", 2), + new WithCount<>("A", 3), + new WithCount<>("B", 4) + ); + } + + @Test + void ascendingParallel() { + // Arrange + final Stream input = Stream.of("A", "A", "A", "B", "B", "B", "B", "C", "C"); + + // Act + final List> output = input.parallel().gather(Gatherers4j.orderByFrequencyAscending()).toList(); + + // Assert + assertThat(output) + .containsExactly( + new WithCount<>("C", 2), + new WithCount<>("A", 3), + new WithCount<>("B", 4) + ); + } + + @Test + void descending() { + // Arrange + final Stream input = Stream.of("A", "A", "A", "B", "B", "B", "B", "C", "C"); + + // Act + final List> output = input.gather(Gatherers4j.orderByFrequencyDescending()).toList(); + + // Assert + assertThat(output) + .containsExactly( + new WithCount<>("B", 4), + new WithCount<>("A", 3), + new WithCount<>("C", 2) + ); + } + + @Test + void descendingParallel() { + // Arrange + final Stream input = Stream.of("A", "A", "A", "B", "B", "B", "B", "C", "C"); + + // Act + final List> output = input.parallel().gather(Gatherers4j.orderByFrequencyDescending()).toList(); + + // Assert + assertThat(output) + .containsExactly( + new WithCount<>("B", 4), + new WithCount<>("A", 3), + new WithCount<>("C", 2) + ); + } + + @Test + void orderMustBeSpecified() { + assertThatThrownBy(() -> + Stream.of("A").gather(new FrequencyGatherer<>(null)).toList() + ).isInstanceOf(IllegalArgumentException.class); + } + +} \ No newline at end of file