Skip to content

Commit

Permalink
Introduce dedicated FilterByIndexGatherer (#48)
Browse files Browse the repository at this point in the history
Introduce dedicated `MoreGatherers.byIndex()` Gatherer as more performant alternative to `zipWithIndex().filter().map()`
  • Loading branch information
pivovarit authored Oct 16, 2024
1 parent 0c66f84 commit 6f822bf
Show file tree
Hide file tree
Showing 7 changed files with 259 additions and 3 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ Provided `Gatherers`:
- `MoreGatherers.distinctUntilChanged(Function<T, R>)`
- takes elements until a change is detected based on a key extractor function
- `MoreGatherers.windowSliding(int, int)`
- creates a sliding window of a fixed size with a fixed step, extends `Gatherers.windowSliding(int)` by adding a step parameter
- creates a sliding window of a fixed size with a fixed step, extends `Gatherers.windowSliding(int)` by adding a step parameter
- `MoreGatherers.byIndex(BiPredicate<Long, T>)`
- filters elements based on their index and value

### Philosophy

Expand Down
29 changes: 29 additions & 0 deletions src/main/java/com/pivovarit/gatherers/FilterByIndexGatherer.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.pivovarit.gatherers;

import java.util.Objects;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiPredicate;
import java.util.function.Supplier;
import java.util.stream.Gatherer;

record FilterByIndexGatherer<T>(BiPredicate<Long, ? super T> predicate) implements Gatherer<T, AtomicLong, T> {

FilterByIndexGatherer {
Objects.requireNonNull(predicate);
}

@Override
public Supplier<AtomicLong> initializer() {
return AtomicLong::new;
}

@Override
public Integrator<AtomicLong, T, T> integrator() {
return Integrator.ofGreedy((seq, t, downstream) -> {
if (predicate.test(seq.getAndIncrement(), t)) {
downstream.push(t);
}
return true;
});
}
}
24 changes: 22 additions & 2 deletions src/main/java/com/pivovarit/gatherers/MoreGatherers.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package com.pivovarit.gatherers;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.stream.Gatherer;
import java.util.stream.Stream;
Expand Down Expand Up @@ -240,7 +240,27 @@ private MoreGatherers() {
* @throws IllegalArgumentException if {@code windowSize} is less than one or {@code step} is less than zero, or greater than {@code windowSize}
* @apiNote this {@link Gatherer} extends {@link java.util.stream.Gatherers#windowSliding(int)} by allowing to customize the step
*/
public static <TR> Gatherer<TR, ?, List<TR>> windowSliding(int windowSize, int step) {
public static <T> Gatherer<T, ?, List<T>> windowSliding(int windowSize, int step) {
return new WindowSlidingGatherer<>(windowSize, step);
}

/**
* Creates a {@link Gatherer} that filters elements based on their index and value using a given {@link BiPredicate}.
* The provided {@code BiPredicate} is applied to each element of the source, along with its corresponding index
* (starting from 0). Only the elements that satisfy the predicate (i.e., for which the predicate returns {@code true})
* are retained.
*
* @param <T> the type of elements to be filtered
* @param predicate a {@link BiPredicate} that takes the index and element as input, and returns {@code true} to retain
* the element, or {@code false} to exclude it
*
* @return a {@link Gatherer} that applies the given filter based on element index and value
*
* @apiNote The same result can be achieved by using {@code zipWithIndex()}, {@code filter()}, and {@code map()}.
* However, this method is significantly faster because it avoids the intermediate steps and directly filters
* elements based on their index.
*/
public static <T> Gatherer<T, ?, T> byIndex(BiPredicate<Long, ? super T> predicate) {
return new FilterByIndexGatherer<>(predicate);
}
}
27 changes: 27 additions & 0 deletions src/test/java/com/pivovarit/gatherers/Benchmarks.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.pivovarit.gatherers;

import org.openjdk.jmh.results.format.ResultFormatType;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.OptionsBuilder;

import java.nio.file.Path;

final class Benchmarks {

private Benchmarks() {
}

private static final Path BENCHMARKS_PATH = Path.of("src/test/resources/benchmarks/");

static void run(Class<?> clazz) throws RunnerException {
new Runner(new OptionsBuilder()
.include(clazz.getSimpleName())
.warmupIterations(3)
.measurementIterations(5)
.resultFormat(ResultFormatType.JSON)
.result(Benchmarks.BENCHMARKS_PATH.resolve("%s.json".formatted(clazz.getSimpleName())).toString())
.forks(1)
.build()).run();
}
}
33 changes: 33 additions & 0 deletions src/test/java/com/pivovarit/gatherers/FilterByIndexBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
package com.pivovarit.gatherers;

import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.runner.RunnerException;

import java.util.List;
import java.util.Map;
import java.util.stream.Stream;

public class FilterByIndexBenchmark {

private static List<Integer> source = Stream.iterate(0, i -> i + 1).limit(10_000_000).toList();

@Benchmark
public List<Integer> filterByIndex() {
return source.stream()
.gather(MoreGatherers.byIndex((i, _) -> i % 2 == 0))
.toList();
}

@Benchmark
public List<Integer> zipWithIndexThenFilter() {
return source.stream()
.gather(MoreGatherers.zipWithIndex())
.filter(t -> t.getValue() % 2 == 0)
.map(Map.Entry::getKey)
.toList();
}

public static void main(String[] ignored) throws RunnerException {
Benchmarks.run(FilterByIndexBenchmark.class);
}
}
29 changes: 29 additions & 0 deletions src/test/java/com/pivovarit/gatherers/blackbox/ByIndexTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package com.pivovarit.gatherers.blackbox;

import org.junit.jupiter.api.Test;

import java.util.stream.Stream;

import static com.pivovarit.gatherers.MoreGatherers.byIndex;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

class ByIndexTest {

@Test
void shouldRejectNullPredicate() {
assertThatThrownBy(() -> byIndex(null)).isInstanceOf(NullPointerException.class);
}

@Test
void shouldFilterByIndexEmptyStream() {
assertThat(Stream.empty().gather(byIndex((_, _) -> true))).isEmpty();
}

@Test
void shouldFilterByIndex() {
assertThat(Stream.of("a", "bb", "cc", "ddd")
.gather(byIndex((i, _) -> i % 2 == 0)))
.containsExactly("a", "cc");
}
}
116 changes: 116 additions & 0 deletions src/test/resources/benchmarks/FilterByIndexBenchmark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
[
{
"jmhVersion" : "1.37",
"benchmark" : "com.pivovarit.gatherers.FilterByIndexBenchmark.filterByIndex",
"mode" : "thrpt",
"threads" : 1,
"forks" : 1,
"jvm" : "/Users/pivovarit/Library/Java/JavaVirtualMachines/openjdk-23/Contents/Home/bin/java",
"jvmArgs" : [
"--enable-preview",
"-javaagent:/Users/pivovarit/Applications/IntelliJ IDEA Ultimate.app/Contents/lib/idea_rt.jar=57563:/Users/pivovarit/Applications/IntelliJ IDEA Ultimate.app/Contents/bin",
"-Dfile.encoding=UTF-8",
"-Dsun.stdout.encoding=UTF-8",
"-Dsun.stderr.encoding=UTF-8"
],
"jdkVersion" : "23",
"vmName" : "OpenJDK 64-Bit Server VM",
"vmVersion" : "23+37-2369",
"warmupIterations" : 3,
"warmupTime" : "10 s",
"warmupBatchSize" : 1,
"measurementIterations" : 5,
"measurementTime" : "10 s",
"measurementBatchSize" : 1,
"primaryMetric" : {
"score" : 24.892622992010878,
"scoreError" : 2.171902468249229,
"scoreConfidence" : [
22.72072052376165,
27.064525460260107
],
"scorePercentiles" : {
"0.0" : 24.551089114037865,
"50.0" : 24.67685263461223,
"90.0" : 25.89673224823203,
"95.0" : 25.89673224823203,
"99.0" : 25.89673224823203,
"99.9" : 25.89673224823203,
"99.99" : 25.89673224823203,
"99.999" : 25.89673224823203,
"99.9999" : 25.89673224823203,
"100.0" : 25.89673224823203
},
"scoreUnit" : "ops/s",
"rawData" : [
[
25.89673224823203,
24.551089114037865,
24.64380175941238,
24.67685263461223,
24.694639203759873
]
]
},
"secondaryMetrics" : {
}
},
{
"jmhVersion" : "1.37",
"benchmark" : "com.pivovarit.gatherers.FilterByIndexBenchmark.zipWithIndexThenFilter",
"mode" : "thrpt",
"threads" : 1,
"forks" : 1,
"jvm" : "/Users/pivovarit/Library/Java/JavaVirtualMachines/openjdk-23/Contents/Home/bin/java",
"jvmArgs" : [
"--enable-preview",
"-javaagent:/Users/pivovarit/Applications/IntelliJ IDEA Ultimate.app/Contents/lib/idea_rt.jar=57563:/Users/pivovarit/Applications/IntelliJ IDEA Ultimate.app/Contents/bin",
"-Dfile.encoding=UTF-8",
"-Dsun.stdout.encoding=UTF-8",
"-Dsun.stderr.encoding=UTF-8"
],
"jdkVersion" : "23",
"vmName" : "OpenJDK 64-Bit Server VM",
"vmVersion" : "23+37-2369",
"warmupIterations" : 3,
"warmupTime" : "10 s",
"warmupBatchSize" : 1,
"measurementIterations" : 5,
"measurementTime" : "10 s",
"measurementBatchSize" : 1,
"primaryMetric" : {
"score" : 17.311580632178348,
"scoreError" : 0.3356105307010133,
"scoreConfidence" : [
16.975970101477333,
17.647191162879363
],
"scorePercentiles" : {
"0.0" : 17.158261528505264,
"50.0" : 17.33638831156214,
"90.0" : 17.372805550060715,
"95.0" : 17.372805550060715,
"99.0" : 17.372805550060715,
"99.9" : 17.372805550060715,
"99.99" : 17.372805550060715,
"99.999" : 17.372805550060715,
"99.9999" : 17.372805550060715,
"100.0" : 17.372805550060715
},
"scoreUnit" : "ops/s",
"rawData" : [
[
17.158261528505264,
17.33638831156214,
17.33402194561894,
17.356425825144683,
17.372805550060715
]
]
},
"secondaryMetrics" : {
}
}
]


0 comments on commit 6f822bf

Please sign in to comment.