Skip to content

Commit

Permalink
Implement ordering by frequency
Browse files Browse the repository at this point in the history
  • Loading branch information
tginsberg committed Dec 4, 2024
1 parent 9c93de4 commit 5958b8a
Show file tree
Hide file tree
Showing 6 changed files with 354 additions and 61 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
### 0.7.0
+ Use greedy integrators where possible (Fixes #57)
+ Add [JSpecify](https://jspecify.dev/) annotations for static analysis
+ Implement `orderByFrequencyAscending()` and `orderByFrequencyDescending()`

### 0.6.0
+ Implement `dropLast(n)`
Expand Down
75 changes: 49 additions & 26 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,34 @@ implementation("com.ginsberg:gatherers4j:0.7.0")

### Streams

| Function | Purpose |
|------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
| `debounce(amount, duration)` | Limit stream elements to `amount` elements over `duration`, dropping any elements over the limit until a new `duration` starts |
| `dedupeConsecutive()` | Remove consecutive duplicates from a stream |
| `dedupeConsecutiveBy(fn)` | Remove consecutive duplicates from a stream as returned by `fn` |
| `distinctBy(fn)` | Emit only distinct elements from the stream, as measured by `fn` |
| `dropLast(n)` | Keep all but the last `n` elements of the stream |
| `exactSize(n)` | Ensure the stream is exactly `n` elements long, or throw an `IllegalStateException` |
| `filterWithIndex(predicate)` | Filter the stream with the given `predicate`, which takes an `element` and its `index` |
| `grouping()` | Group consecute identical elements into lists |
| `groupingBy(fn)` | Group consecutive elements that are identical according to `fn` into lists |
| `interleave(iterable)` | Creates a stream of alternating objects from the input stream and the argument iterable |
| `interleave(iterator)` | Creates a stream of alternating objects from the input stream and the argument iterator |
| `interleave(stream)` | Creates a stream of alternating objects from the input stream and the argument stream |
| `last(n)` | Constrain the stream to the last `n` values |
| `maxBy(fn)` | Return a stream containing a single element, which is the maximum value returned by the mapping function `fn` |
| `minBy(fn)` | Return a stream containing a single element, which is the minimum value returned by the mapping function `fn` |
| `reverse()` | Reverse the order of the stream |
| `shuffle()` | Shuffle the stream into a random order using the platform default `RandomGenerator` |
| `shuffle(rg)` | Shuffle the stream into a random order using the specified `RandomGenerator` |
| `throttle(amount, duration)` | Limit stream elements to `amount` elements over `duration`, pausing until a new `duration` period starts |
| `withIndex()` | Maps all elements of the stream as-is along with their 0-based index |
| `zipWith(iterable)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterable |
| `zipWith(iterator)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterator |
| `zipWith(stream)` | Creates a stream of `Pair` objects whose values come from the input stream and argument stream |
| `zipWithNext()` | Creates a stream of `List` objects via a sliding window of width 2 and stepping 1 |
| Function | Purpose |
|-------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
| `debounce(amount, duration)` | Limit stream elements to `amount` elements over `duration`, dropping any elements over the limit until a new `duration` starts |
| `dedupeConsecutive()` | Remove consecutive duplicates from a stream |
| `dedupeConsecutiveBy(fn)` | Remove consecutive duplicates from a stream as returned by `fn` |
| `distinctBy(fn)` | Emit only distinct elements from the stream, as measured by `fn` |
| `dropLast(n)` | Keep all but the last `n` elements of the stream |
| `exactSize(n)` | Ensure the stream is exactly `n` elements long, or throw an `IllegalStateException` |
| `filterWithIndex(predicate)` | Filter the stream with the given `predicate`, which takes an `element` and its `index` |
| `grouping()` | Group consecute identical elements into lists |
| `groupingBy(fn)` | Group consecutive elements that are identical according to `fn` into lists |
| `interleave(iterable)` | Creates a stream of alternating objects from the input stream and the argument iterable |
| `interleave(iterator)` | Creates a stream of alternating objects from the input stream and the argument iterator |
| `interleave(stream)` | Creates a stream of alternating objects from the input stream and the argument stream |
| `last(n)` | Constrain the stream to the last `n` values |
| `maxBy(fn)` | Return a stream containing a single element, which is the maximum value returned by the mapping function `fn` |
| `minBy(fn)` | Return a stream containing a single element, which is the minimum value returned by the mapping function `fn` |
| `orderByFrequencyAscending() | Returns a stream where elements are ordered from least to most frequent as `WithCount<T>` wrapper objects. |
| `orderByFrequencyDescending() | Returns a stream where elements are ordered from most to least frequent as `WithCount<T>` wrapper objects. |
| `reverse()` | Reverse the order of the stream |
| `shuffle()` | Shuffle the stream into a random order using the platform default `RandomGenerator` |
| `shuffle(rg)` | Shuffle the stream into a random order using the specified `RandomGenerator` |
| `throttle(amount, duration)` | Limit stream elements to `amount` elements over `duration`, pausing until a new `duration` period starts |
| `withIndex()` | Maps all elements of the stream as-is along with their 0-based index |
| `zipWith(iterable)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterable |
| `zipWith(iterator)` | Creates a stream of `Pair` objects whose values come from the input stream and argument iterator |
| `zipWith(stream)` | Creates a stream of `Pair` objects whose values come from the input stream and argument stream |
| `zipWithNext()` | Creates a stream of `List` objects via a sliding window of width 2 and stepping 1 |

### Mathematics/Statistics

Expand Down Expand Up @@ -246,6 +248,27 @@ streamOfPeople
// Person("Baby", 1)
```


#### Order elements by frequency, ascending

```java
Stream.of("A", "A", "A", "B", "B" ,"C")
.gather(Gatherers4j.orderByFrequencyAscending())
.toList()

// [WithCount("C", 1), WithCount("B", 2), WithCount("C", 3) ]
```

#### Order elements by frequency, descending

```java
Stream.of("A", "A", "A", "B", "B" ,"C")
.gather(Gatherers4j.orderByFrequencyDescending())
.toList()

// [WithCount("C", 3), WithCount("B", 2), WithCount("A", 1) ]
```

#### Reverse the order of the stream

```java
Expand Down
86 changes: 86 additions & 0 deletions src/main/java/com/ginsberg/gatherers4j/FrequencyGatherer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright 2024 Todd Ginsberg
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.ginsberg.gatherers4j;

import org.jspecify.annotations.Nullable;

import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.BinaryOperator;
import java.util.function.Supplier;
import java.util.stream.Gatherer;

import static com.ginsberg.gatherers4j.GathererUtils.mustNotBeNull;

public class FrequencyGatherer<INPUT extends @Nullable Object>
implements Gatherer<INPUT, FrequencyGatherer.State<INPUT>, WithCount<INPUT>> {

public enum Order {
Ascending,
Descending
}
private final Order order;

FrequencyGatherer(final Order order) {
mustNotBeNull(order, "Order must be specified");
this.order = order;
}

@Override
public Supplier<State<INPUT>> initializer() {
return State::new;
}

@Override
public Integrator<State<INPUT>, INPUT, WithCount<INPUT>> integrator() {
return Integrator.ofGreedy((state, element, downstream) -> {
state.counts.compute(element, (_, count) -> count == null ? 1 : count + 1);
return !downstream.isRejecting();
});
}

@Override
public BinaryOperator<State<INPUT>> combiner() {
return (state1, state2) -> {
state2.counts.forEach((key, value) -> state1.counts.merge(key, value, Long::sum));
return state1;
};
}

@Override
public BiConsumer<State<INPUT>, Downstream<? super WithCount<INPUT>>> finisher() {
return (inputState, downstream) -> inputState.counts
.entrySet()
.stream().map(it -> new WithCount<>(it.getKey(), it.getValue()))
.sorted(comparator())
.forEach(downstream::push);
}

private Comparator<WithCount<INPUT>> comparator() {
if(order == Order.Descending) {
return (o1, o2) -> (int)(o2.count() - o1.count());
} else {
return (o1, o2) -> (int)(o1.count() - o2.count());
}
}

public static class State<INPUT> {
final Map<INPUT, Long> counts = new HashMap<>();
}
}
Loading

0 comments on commit 5958b8a

Please sign in to comment.