From 98dbca742a2c10a2c0cd564fb0a00caafc009a89 Mon Sep 17 00:00:00 2001 From: artpar Date: Tue, 2 Jan 2024 19:15:47 +0530 Subject: [PATCH 01/18] memory mapped threads --- .../onebrc/CalculateAverage_artpar.java | 264 ++++++++++++++++++ 1 file changed, 264 insertions(+) create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java b/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java new file mode 100644 index 000000000..862f73ac9 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java @@ -0,0 +1,264 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.FileInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.channels.SeekableByteChannel; +import java.nio.charset.StandardCharsets; +import java.nio.file.*; +import java.time.Instant; +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.stream.Collectors; + +public class CalculateAverage_artpar { + + public static final int N_THREADS = 8; + private static final String FILE = "./measurements.txt"; + + public static void main(String[] args) throws IOException { + + Path measurementFile = Paths.get(FILE); + OpenOption openOptions = StandardOpenOption.READ; + + long fileSize = Files.size(measurementFile); + + long expectedChunkSize = fileSize / N_THREADS; + + ExecutorService threadPool = Executors.newFixedThreadPool(N_THREADS); + + long chunkStartPosition = 0; + FileInputStream fis = new FileInputStream(measurementFile.toFile()); + List>> futures = new ArrayList<>(); + long bytesReadCurrent = 0; + + try (FileChannel fileChannel = FileChannel.open(measurementFile, StandardOpenOption.READ)) { + for (int i = 0; i < N_THREADS; i++) { + + long chunkSize = expectedChunkSize; + chunkSize = fis.skip(chunkSize); + + bytesReadCurrent += chunkSize; + while (((char) fis.read()) != '\n' && bytesReadCurrent < fileSize) { + chunkSize++; + bytesReadCurrent++; + } + + MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, chunkStartPosition, + chunkSize); + ReaderRunnable readerRunnable = new ReaderRunnable(mappedByteBuffer, StandardOpenOption.READ, + chunkStartPosition, chunkSize); + Future> future = threadPool.submit(readerRunnable::run); + futures.add(future); + } + } + + Map globalMap = new HashMap<>(); + for (Future> future : futures) { + try { + Map stringMeasurementAggregatorMap = future.get(); + for (Map.Entry entry : stringMeasurementAggregatorMap.entrySet()) { + if (globalMap.containsKey(entry.getKey())) { + MeasurementAggregator value = entry.getValue(); + globalMap.get(entry.getKey()).combine(value); + } + else { + globalMap.put(entry.getKey(), entry.getValue()); + } + } + } + catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + } + + Map results = globalMap.entrySet().stream() + .map(e -> Map.entry(e.getKey(), e.getValue().finish())) + .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); + + threadPool.shutdown(); + Map measurements = new TreeMap<>(results); + + System.out.println(measurements); + } + + private record Measurement(String station, double value) { + } + + ; + + private record ResultRow(double min, double mean, double max) { + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + private static class MeasurementAggregator { + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private double sum; + private long count; + + void add(Measurement measurement) { + min = Math.min(min, measurement.value()); + max = Math.max(max, measurement.value()); + sum += measurement.value(); + count++; + } + + MeasurementAggregator combine(MeasurementAggregator other) { + min = Math.min(min, other.min); + max = Math.max(max, other.max); + sum += other.sum; + count += other.count; + return this; + } + + ResultRow finish() { + double mean = (count > 0) ? sum / count : 0; + return new ResultRow(min, mean, max); + } + } + + private static class ReaderRunnable { + private final MappedByteBuffer mappedByteBuffer; + private final OpenOption openOptions; + private final long chunkStartPosition; + private final long chunkSize; + private final Map results; + private Map nameStringMap = new HashMap(1024 * 1024); + private Map tempStringMap = new HashMap(1024 * 1024); + private Map hashToDouble = new HashMap<>(1024 * 1024); + + private ReaderRunnable(MappedByteBuffer mappedByteBuffer, OpenOption openOptions, long chunkStartPosition, long chunkSize) { + this.mappedByteBuffer = mappedByteBuffer; + this.openOptions = openOptions; + this.results = new HashMap<>(); + this.chunkStartPosition = chunkStartPosition; + this.chunkSize = chunkSize; + } + + public Map run() { + long start = Date.from(Instant.now()).getTime(); + SeekableByteChannel fileChannel = null; + int totalBytesRead = 0; + Map groupedMeasurements = new HashMap<>(); + + ByteBuffer nameBuffer = ByteBuffer.allocate(128); + String matchedStation = ""; + boolean readUntilSemiColon = true; + + while (mappedByteBuffer.hasRemaining()) { + byte b = mappedByteBuffer.get(); + + if (readUntilSemiColon) { + if ((byte) b != ';') { + nameBuffer.put(b); + continue; + } + else { + readUntilSemiColon = false; + matchedStation = getNameStringFromBufferUsingBuffer(nameBuffer); + continue; + } + } + + if (b != '\n') { + nameBuffer.put(b); + } + else { + readUntilSemiColon = true; + String tempValue = getTempStringFromBufferUsingBuffer(nameBuffer); + + int tempValueHashCode = tempValue.hashCode(); + if (!hashToDouble.containsKey(tempValueHashCode)) { + hashToDouble.put(tempValueHashCode, Double.parseDouble(tempValue)); + } + Double doubleValue = hashToDouble.get(tempValueHashCode); + + Measurement measurement = new Measurement(matchedStation, doubleValue); + groupedMeasurements.computeIfAbsent(measurement.station(), k -> new MeasurementAggregator()) + .add(measurement); + } + } + + long end = Date.from(Instant.now()).getTime(); + System.out.println("Took [" + ((end - start) / 1000) + "s for " + totalBytesRead / 1024 + " kb"); + + return groupedMeasurements; + } + + private String getNameStringFromBufferUsingBuffer(ByteBuffer nameBuffer) { + nameBuffer.flip(); + + byte[] array = nameBuffer.array(); + int length = nameBuffer.limit(); + int byteArrayHashCode = hashCode(array, 0, length); + nameBuffer.flip(); + nameBuffer.clear(); + + if (!nameStringMap.containsKey(byteArrayHashCode)) { + String value = new String(array, 0, length, StandardCharsets.UTF_8); + nameStringMap.put(byteArrayHashCode, value); + return value; + } + + return nameStringMap.get(byteArrayHashCode); + } + + private int hashCode(byte[] array, int start, int length) { + if (array == null) { + return 0; + } + + int result = 1; + for (int i = start; i < start + length; i++) { + result = 31 * result + array[i]; + } + + return result; + } + + private String getTempStringFromBufferUsingBuffer(ByteBuffer nameBuffer) { + nameBuffer.flip(); + + byte[] array = nameBuffer.array(); + int length = nameBuffer.limit(); + int byteArrayHashCode = hashCode(array, 0, length); + nameBuffer.flip(); + nameBuffer.clear(); + + if (!tempStringMap.containsKey(byteArrayHashCode)) { + String value = new String(array, 0, length, StandardCharsets.UTF_8); + tempStringMap.put(byteArrayHashCode, value); + return value; + } + + return tempStringMap.get(byteArrayHashCode); + } + } +} From 5f6cba357f304b2655dab800c5f77deed88ebb79 Mon Sep 17 00:00:00 2001 From: artpar Date: Tue, 2 Jan 2024 20:25:02 +0530 Subject: [PATCH 02/18] parallel collect cleanup commented code remove log comments print in the original format without total counts vector api parallel collect memory mapped threads --- calculate_average_artpar.sh | 20 ++ .../onebrc/CalculateAverage_artpar.java | 190 ++++++++++++++---- 2 files changed, 173 insertions(+), 37 deletions(-) create mode 100755 calculate_average_artpar.sh diff --git a/calculate_average_artpar.sh b/calculate_average_artpar.sh new file mode 100755 index 000000000..d1876af11 --- /dev/null +++ b/calculate_average_artpar.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_artpar diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java b/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java index 862f73ac9..bf9e6a2c8 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_artpar.java @@ -15,29 +15,33 @@ */ package dev.morling.onebrc; +import jdk.incubator.vector.DoubleVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; -import java.nio.channels.SeekableByteChannel; import java.nio.charset.StandardCharsets; import java.nio.file.*; import java.time.Instant; import java.util.*; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; +import java.util.concurrent.*; import java.util.stream.Collectors; public class CalculateAverage_artpar { public static final int N_THREADS = 8; private static final String FILE = "./measurements.txt"; + private static final Map nameStringMap = new ConcurrentHashMap<>(1024 * 1024); + private static final Map tempStringMap = new ConcurrentHashMap<>(1024 * 1024); + private static final Map hashToDouble = new ConcurrentHashMap<>(1024 * 1024); + private static final VectorSpecies SPECIES = DoubleVector.SPECIES_PREFERRED; public static void main(String[] args) throws IOException { - + long start = Instant.now().toEpochMilli(); Path measurementFile = Paths.get(FILE); OpenOption openOptions = StandardOpenOption.READ; @@ -64,50 +68,74 @@ public static void main(String[] args) throws IOException { bytesReadCurrent++; } + // System.out.println("[" + chunkStartPosition + "] - [" + (chunkStartPosition + chunkSize) + " bytes"); + if (chunkStartPosition + chunkSize >= fileSize) { + chunkSize = fileSize - chunkStartPosition; + } + MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, chunkStartPosition, chunkSize); + ReaderRunnable readerRunnable = new ReaderRunnable(mappedByteBuffer, StandardOpenOption.READ, chunkStartPosition, chunkSize); Future> future = threadPool.submit(readerRunnable::run); futures.add(future); + chunkStartPosition = chunkStartPosition + chunkSize + 1; } } - Map globalMap = new HashMap<>(); - for (Future> future : futures) { - try { - Map stringMeasurementAggregatorMap = future.get(); - for (Map.Entry entry : stringMeasurementAggregatorMap.entrySet()) { - if (globalMap.containsKey(entry.getKey())) { - MeasurementAggregator value = entry.getValue(); - globalMap.get(entry.getKey()).combine(value); + Map globalMap = futures.parallelStream() + .flatMap(future -> { + try { + return future.get().entrySet().stream(); } - else { - globalMap.put(entry.getKey(), entry.getValue()); + catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); } - } - } - catch (InterruptedException | ExecutionException e) { - throw new RuntimeException(e); - } - } + }) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (existing, replacement) -> { + existing.combine(replacement); + return existing; + })); Map results = globalMap.entrySet().stream() - .map(e -> Map.entry(e.getKey(), e.getValue().finish())) + .parallel().map(e -> Map.entry(e.getKey(), e.getValue().finish())) .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); threadPool.shutdown(); Map measurements = new TreeMap<>(results); System.out.println(measurements); + // long end = Instant.now().toEpochMilli(); + // System.out.println((end - start) / 1000); + } - private record Measurement(String station, double value) { + public static double sum(double[] array) { + int i = 0; + double sum = 0.0; + + // Vectorized loop + for (; i < SPECIES.loopBound(array.length); i += SPECIES.length()) { + var v = DoubleVector.fromArray(SPECIES, array, i); + sum += v.reduceLanes(VectorOperators.ADD); + } + + // Scalar loop for remaining elements + for (; i < array.length; i++) { + sum += array[i]; + } + + return sum; } - ; + private record Measurement(String station, double value) { + } - private record ResultRow(double min, double mean, double max) { + private record ResultRow(double min, double mean, double max, long count) { public String toString() { return round(min) + "/" + round(mean) + "/" + round(max); } @@ -122,11 +150,27 @@ private static class MeasurementAggregator { private double max = Double.NEGATIVE_INFINITY; private double sum; private long count; + private String name; + + public MeasurementAggregator() { + } + + public MeasurementAggregator(double min, double max, double sum, long count) { + this.min = min; + this.max = max; + this.sum = sum; + this.count = count; + } + + public String getName() { + return name; + } void add(Measurement measurement) { min = Math.min(min, measurement.value()); max = Math.max(max, measurement.value()); sum += measurement.value(); + name = measurement.station; count++; } @@ -140,7 +184,7 @@ MeasurementAggregator combine(MeasurementAggregator other) { ResultRow finish() { double mean = (count > 0) ? sum / count : 0; - return new ResultRow(min, mean, max); + return new ResultRow(min, mean, max, count); } } @@ -150,9 +194,8 @@ private static class ReaderRunnable { private final long chunkStartPosition; private final long chunkSize; private final Map results; - private Map nameStringMap = new HashMap(1024 * 1024); - private Map tempStringMap = new HashMap(1024 * 1024); - private Map hashToDouble = new HashMap<>(1024 * 1024); + Map stationValueMap = new HashMap<>(); + Map stationIndexMap = new HashMap<>(); private ReaderRunnable(MappedByteBuffer mappedByteBuffer, OpenOption openOptions, long chunkStartPosition, long chunkSize) { this.mappedByteBuffer = mappedByteBuffer; @@ -164,17 +207,18 @@ private ReaderRunnable(MappedByteBuffer mappedByteBuffer, OpenOption openOptions public Map run() { long start = Date.from(Instant.now()).getTime(); - SeekableByteChannel fileChannel = null; int totalBytesRead = 0; Map groupedMeasurements = new HashMap<>(); + final int VECTOR_SIZE = 512; + final int VECTOR_SIZE_1 = VECTOR_SIZE - 1; ByteBuffer nameBuffer = ByteBuffer.allocate(128); String matchedStation = ""; boolean readUntilSemiColon = true; while (mappedByteBuffer.hasRemaining()) { byte b = mappedByteBuffer.get(); - + totalBytesRead++; if (readUntilSemiColon) { if ((byte) b != ';') { nameBuffer.put(b); @@ -198,16 +242,88 @@ public Map run() { if (!hashToDouble.containsKey(tempValueHashCode)) { hashToDouble.put(tempValueHashCode, Double.parseDouble(tempValue)); } - Double doubleValue = hashToDouble.get(tempValueHashCode); + double doubleValue = hashToDouble.get(tempValueHashCode); + + // Measurement measurement = new Measurement(matchedStation, doubleValue); + double[] array = stationValueMap.computeIfAbsent(matchedStation, (k) -> { + stationIndexMap.put(k, 0); + return new double[VECTOR_SIZE]; + }); + Integer index = stationIndexMap.get(matchedStation); + array[index] = doubleValue; + if (index == VECTOR_SIZE_1) { + + int i = 0; + double min = Double.POSITIVE_INFINITY; + double max = Double.NEGATIVE_INFINITY; + double sum = 0; + long count = 0; + for (; i < SPECIES.loopBound(array.length); i += SPECIES.length()) { + // Vector operations + DoubleVector vector = DoubleVector.fromArray(SPECIES, array, i); + min = Math.min(min, vector.reduceLanes(VectorOperators.MIN)); + max = Math.max(max, vector.reduceLanes(VectorOperators.MAX)); + sum += vector.reduceLanes(VectorOperators.ADD); + count += vector.length(); + } + + // MeasurementAggregator ma = new MeasurementAggregator(min, max, sum, VECTOR_SIZE); + // groupedMeasurements.computeIfAbsent(matchedStation, k -> new MeasurementAggregator()) + // .combine(ma); + + int remainingCount = array.length - i; + for (; i < array.length; i++) { + min = Math.min(min, array[i]); + max = Math.max(max, array[i]); + sum += array[i]; + count++; + } + MeasurementAggregator ma = new MeasurementAggregator(min, max, sum, count); + // System.out.println("Sum ma [" + ma + "]"); + groupedMeasurements.computeIfAbsent(matchedStation, k -> new MeasurementAggregator()) + .combine(ma); + + stationIndexMap.put(matchedStation, 0); + continue; + } + stationIndexMap.put(matchedStation, index + 1); + } + } + + VectorSpecies species = DoubleVector.SPECIES_PREFERRED; + for (String stationName : stationIndexMap.keySet()) { - Measurement measurement = new Measurement(matchedStation, doubleValue); - groupedMeasurements.computeIfAbsent(measurement.station(), k -> new MeasurementAggregator()) - .add(measurement); + Integer count = stationIndexMap.get(stationName); + if (count < 1) { + continue; } + else if (count == 1) { + double[] array = stationValueMap.get(stationName); + double val = array[0]; + MeasurementAggregator ma = new MeasurementAggregator(val, val, val, 1); + groupedMeasurements.computeIfAbsent(stationName, k -> new MeasurementAggregator()) + .combine(ma); + } + else { + double[] array = stationValueMap.get(stationName); + double[] subArray = new double[count]; + System.arraycopy(array, 0, subArray, 0, count); + // Creating a DoubleVector from the array + // System.out.println("Create vector from [" + count + "] -> " + subArray.length); + DoubleVector doubleVector = DoubleVector.fromArray(species, subArray, 0); + double min = doubleVector.reduceLanes(VectorOperators.MIN); + double max = doubleVector.reduceLanes(VectorOperators.MAX); + double sum = doubleVector.reduceLanes(VectorOperators.ADD); + MeasurementAggregator ma = new MeasurementAggregator(min, max, sum, count); + groupedMeasurements.computeIfAbsent(stationName, k -> new MeasurementAggregator()) + .combine(ma); + } + + stationIndexMap.put(matchedStation, 0); } long end = Date.from(Instant.now()).getTime(); - System.out.println("Took [" + ((end - start) / 1000) + "s for " + totalBytesRead / 1024 + " kb"); + // System.out.println("Took [" + ((end - start) / 1000) + "s for " + totalBytesRead / 1024 + " kb"); return groupedMeasurements; } From 97e22b7e2fb9808f324878cac67efba57f5eb2df Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 10:38:07 +0100 Subject: [PATCH 03/18] Update README.md --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 06d16489c..8c7f3915b 100644 --- a/README.md +++ b/README.md @@ -146,7 +146,8 @@ _Q: Why_ 1️⃣🐝🏎️ _?_\ A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. _Q: Can I make assumptions on the names of the weather stations showing up in the data set?_\ -A: No, while only a fixed set of station names is used by the data set generator, any solution should work with arbitrary UTF-8 station names. +A: No, while only a fixed set of station names is used by the data set generator, any solution should work with arbitrary UTF-8 station names +(for the sake of simplicity, names are guaranteed to contain no `;` character). ## License From 9c13459981f7e323286f57b72267f72e4a821fd9 Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 11:09:44 +0100 Subject: [PATCH 04/18] FAQ additions --- README.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8c7f3915b..702dbed4a 100644 --- a/README.md +++ b/README.md @@ -142,13 +142,19 @@ A: No, this challenge is focussed on Java only. Feel free to inofficially share _Q: Can I use non-JVM languages and/or tools?_\ A: No, this challenge is focussed on Java only. Feel free to inofficially share interesting implementations and results though. For instance it would be interesting to see how DuckDB fares with this task. -_Q: Why_ 1️⃣🐝🏎️ _?_\ -A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. +_Q: Can I use JNI?_\ +A: Submissions must be implemented in Java; JNI requires glue code written in C/C++, so it cannot be used. You could use AOT compilation of Java code via GraalVM though, either by AOT-compiling the entire application, or by creating a native library which you then invoke using the new Java FFI (https://openjdk.org/jeps/442[JEP 442]). + +_Q: What is the encoding of the measurements.txt file?_\ +A: The file is encoded with UTF-8. _Q: Can I make assumptions on the names of the weather stations showing up in the data set?_\ A: No, while only a fixed set of station names is used by the data set generator, any solution should work with arbitrary UTF-8 station names (for the sake of simplicity, names are guaranteed to contain no `;` character). +_Q: Why_ 1️⃣🐝🏎️ _?_\ +A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. + ## License This code base is available under the Apache License, version 2. From f6c3f2b540ae9542e707ae185f61e6f3d04c19a4 Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 18:24:02 +0100 Subject: [PATCH 05/18] Clarifying allowed builds and expanding FAQ --- README.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 702dbed4a..0b7a53300 100644 --- a/README.md +++ b/README.md @@ -86,7 +86,10 @@ Execute the following steps to run the challenge: The following rules and limits apply: -* Any Java distribution provided by [SDKMan](https://sdkman.io/jdks) as well as early access builds available on openjdk.net may be used (including EA builds for OpenJDK projects like Valhalla). +* Any of these Java distributions may be used: + * Any builds provided by [SDKMan](https://sdkman.io/jdks) + * Early access builds available on openjdk.net may be used (including EA builds for OpenJDK projects like Valhalla) + * Builds on [builds.shipilev.net](https://builds.shipilev.net/openjdk-jdk-lilliput/) If you want to use a build not available via these channels, reach out to discuss whether it can be considered. * No external library dependencies may be used * Implementations must be provided as a single source file @@ -152,6 +155,9 @@ _Q: Can I make assumptions on the names of the weather stations showing up in th A: No, while only a fixed set of station names is used by the data set generator, any solution should work with arbitrary UTF-8 station names (for the sake of simplicity, names are guaranteed to contain no `;` character). +_Q: Can I copy code from other submissions?_ +A: Yes, you can. The primary focus of the challenge is about learning something new, rather than "winning". When you do so, please give credit to the relevant source submissions. Please don't re-submit other entries with no or only trivial improvements. + _Q: Why_ 1️⃣🐝🏎️ _?_\ A: It's the abbreviation of the project name: **One** **B**illion **R**ow **C**hallenge. From 8c8d89119587a2cf705efa2d78d2dd967b24fa08 Mon Sep 17 00:00:00 2001 From: Adam Leskis Date: Tue, 2 Jan 2024 15:22:03 +0000 Subject: [PATCH 06/18] fix simple typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0b7a53300..91db6deb6 100644 --- a/README.md +++ b/README.md @@ -108,7 +108,7 @@ To submit your own implementation to 1BRC, follow these steps: * (Optional) If you'd like to use native binaries (GraalVM), adjust the _pom.xml_ file so that it builds that binary. * Create a pull request against the upstream repository, clearly stating * The name of your implementation class. - * The JDK build to use (of not specified, the latest OpenJDK 21 upstream build will be used). + * The JDK build to use (if not specified, the latest OpenJDK 21 upstream build will be used). * The execution time of the program on your system and specs of the same (CPU, number of cores, RAM). This is for informative purposes only, the official runtime will be determined as described below. * I will run the program and determine its performance as described in the next section, and enter the result to the scoreboard. From 6c9556ff6f869f61a21123401ce3be8fb5ffac01 Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 19:35:54 +0100 Subject: [PATCH 07/18] Updating evaluation scripts --- evaluate.sh | 9 ++++++++- prepare_evaluation.sh | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100755 prepare_evaluation.sh diff --git a/evaluate.sh b/evaluate.sh index 78b418385..7c8aa82ea 100755 --- a/evaluate.sh +++ b/evaluate.sh @@ -15,8 +15,15 @@ # limitations under the License. # +if [ -z "$1" ] + then + echo "Usage: evaluate.sh " + exit 1 +fi + +mvn clean verify for i in {1..5} do - time ./calculate_average.sh + time ./calculate_average_$1.sh done diff --git a/prepare_evaluation.sh b/prepare_evaluation.sh new file mode 100755 index 000000000..250279ff8 --- /dev/null +++ b/prepare_evaluation.sh @@ -0,0 +1,28 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [ -z "$1" ] + then + echo "Usage: prepare_evaluation.sh " + exit 1 +fi + + +git checkout -b $1 + +git pull https://github.com/$1/1brc.git +# git pull git@github.com:$1/1brc.git From f76461fc0104a8574b3c57e8f026234df2670137 Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 19:59:37 +0100 Subject: [PATCH 08/18] Only one `time` --- evaluate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evaluate.sh b/evaluate.sh index 7c8aa82ea..35b417a1f 100755 --- a/evaluate.sh +++ b/evaluate.sh @@ -25,5 +25,5 @@ mvn clean verify for i in {1..5} do - time ./calculate_average_$1.sh + ./calculate_average_$1.sh done From 9696a3e29351e191e42ad0cde3a6899e1b1920fc Mon Sep 17 00:00:00 2001 From: itaske <39966884+itaske@users.noreply.github.com> Date: Tue, 2 Jan 2024 20:01:33 +0100 Subject: [PATCH 09/18] Adding itaske's implementation --- calculate_average_patrick.sh | 20 +++++ .../onebrc/CalculateAverage_patrick.java | 76 +++++++++++++++++++ 2 files changed, 96 insertions(+) create mode 100644 calculate_average_patrick.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java diff --git a/calculate_average_patrick.sh b/calculate_average_patrick.sh new file mode 100644 index 000000000..25f725f4e --- /dev/null +++ b/calculate_average_patrick.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_patrick diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java b/src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java new file mode 100644 index 000000000..298a43745 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java @@ -0,0 +1,76 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.AbstractMap; +import java.util.Comparator; +import java.util.Map; +import java.util.concurrent.ConcurrentSkipListMap; +import java.util.stream.Collectors; + +public class CalculateAverage_patrick { + + private static final String FILE = "./measurements.txt"; + + private record Measurement(long count, double sum, double min, double max) { + + Measurement(double value) { + this(1, value, value, value); + } + + + public String toString() { + StringBuilder builder = new StringBuilder(); + builder.append(round(min)); + builder.append("/"); + builder.append(round(sum/count)); + builder.append("/"); + builder.append(round(max)); + + return builder.toString(); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + public static void main(String[] args) throws IOException { + + Map resultMap = Files.lines(Path.of(FILE)).parallel() + .map(line -> { + int separatorIndex = line.indexOf(";"); + String key = line.substring(0, separatorIndex); + double value = Double.parseDouble(line.substring(separatorIndex + 1)); + return new AbstractMap.SimpleEntry<>(key, value); + }) + .collect(Collectors.toConcurrentMap( + entry -> entry.getKey(), + entry -> new Measurement(entry.getValue()), + ((measurement1, measurement2) -> new Measurement( + measurement1.count + measurement2.count, + measurement1.sum + measurement2.sum, + Math.min(measurement1.min, measurement2.min), + Math.max(measurement1.max, measurement2.max))))); + + System.out.print( + resultMap.entrySet().stream().sorted(Map.Entry.comparingByKey()).map(Object::toString).collect(Collectors.joining(", ", "{", "}"))); + + } +} From 7011e3e460ea2c87522f99d317b6c6105d494483 Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 20:07:39 +0100 Subject: [PATCH 10/18] Evaluating itaske's submission --- README.md | 5 +++-- calculate_average_patrick.sh => calculate_average_itaske.sh | 0 ...lateAverage_patrick.java => CalculateAverage_itaske.java} | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) rename calculate_average_patrick.sh => calculate_average_itaske.sh (100%) mode change 100644 => 100755 rename src/main/java/dev/morling/onebrc/{CalculateAverage_patrick.java => CalculateAverage_itaske.java} (98%) diff --git a/README.md b/README.md index 91db6deb6..6d8ab4ec4 100644 --- a/README.md +++ b/README.md @@ -37,8 +37,9 @@ Submit your implementation by Jan 31 2024 and become part of the leaderboard! | # | Result (m:s:ms) | Implementation | Submitter | |---|-----------------|--------------------|---------------| -| 1.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| Roy van Rijn| -| 2.| 04:13.449| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| Gunnar Morling| +| 1.| 02:08.845| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| +| 2.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| [Roy van Rijn](https://github.com/royvanrijn)| +| 3.| 04:13.449| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| See [below](#entering-the-challenge) for instructions how to enter the challenge with your own implementation. diff --git a/calculate_average_patrick.sh b/calculate_average_itaske.sh old mode 100644 new mode 100755 similarity index 100% rename from calculate_average_patrick.sh rename to calculate_average_itaske.sh diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java b/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java similarity index 98% rename from src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java rename to src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java index 298a43745..9e137012f 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_patrick.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java @@ -24,7 +24,7 @@ import java.util.concurrent.ConcurrentSkipListMap; import java.util.stream.Collectors; -public class CalculateAverage_patrick { +public class CalculateAverage_itaske { private static final String FILE = "./measurements.txt"; From 4de980f6d9e9ac5c11e9c056628b807a1f222946 Mon Sep 17 00:00:00 2001 From: Keshavram Kuduwa Date: Tue, 2 Jan 2024 15:31:18 +0530 Subject: [PATCH 11/18] Keshavram Kuduwa's Submission --- calculate_average_kuduwa.sh | 20 +++++ .../onebrc/CalculateAverage_kuduwa.java | 79 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100755 calculate_average_kuduwa.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java diff --git a/calculate_average_kuduwa.sh b/calculate_average_kuduwa.sh new file mode 100755 index 000000000..a5ce4c275 --- /dev/null +++ b/calculate_average_kuduwa.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_kuduwa diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java b/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java new file mode 100644 index 000000000..98a3adab9 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java @@ -0,0 +1,79 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.stream.Collectors; + +public class CalculateAverage_kuduwa { + + private static final String FILE = "./measurements.txt"; + + private record Measurement(double min, double max, double sum, long count) { + + Measurement(double initialMeasurement) { + this(initialMeasurement, initialMeasurement, initialMeasurement, 1); + } + + public static Measurement combineWith(Measurement m1, Measurement m2) { + return new Measurement( + m1.min < m2.min ? m1.min : m2.min, + m1.max > m2.max ? m1.max : m2.max, + m1.sum + m2.sum, + m1.count + m2.count); + } + + public String toString() { + return round(min) + "/" + round(sum / count) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + public static void main(String[] args) throws IOException { + // long before = System.currentTimeMillis(); + Map resultMap = new ConcurrentHashMap<>(); + Files.lines(Path.of(FILE)) + .parallel() + .forEach( + line -> { + int pivot = line.indexOf(";"); + String key = line.substring(0, pivot); + Measurement measured = new Measurement(Double.parseDouble(line.substring(pivot + 1))); + Measurement existingMeasurement = resultMap.get(key); + if (existingMeasurement != null) { + resultMap.put(key, Measurement.combineWith(existingMeasurement, measured)); + } + else { + resultMap.put(key, measured); + } + }); + System.out.print("{"); + System.out.print( + resultMap.entrySet().stream() + .map(Object::toString) + .sorted() + .collect(Collectors.joining(", "))); + System.out.println("}"); + // System.out.println("Took: " + (System.currentTimeMillis() - before)); + } +} From f5535d903aec58580dc469932ef5a6b50651e106 Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 20:30:41 +0100 Subject: [PATCH 12/18] Evaluating Kuduwa Keshavram's submission --- README.md | 3 ++- ..._average_kuduwa.sh => calculate_average_kuduwa-keshavram.sh | 2 +- ...rage_kuduwa.java => CalculateAverage_kuduwa_keshavram.java} | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) rename calculate_average_kuduwa.sh => calculate_average_kuduwa-keshavram.sh (92%) rename src/main/java/dev/morling/onebrc/{CalculateAverage_kuduwa.java => CalculateAverage_kuduwa_keshavram.java} (98%) diff --git a/README.md b/README.md index 6d8ab4ec4..2b1cb16a2 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,8 @@ Submit your implementation by Jan 31 2024 and become part of the leaderboard! | # | Result (m:s:ms) | Implementation | Submitter | |---|-----------------|--------------------|---------------| -| 1.| 02:08.845| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| +| 1.| 02:08.315| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| +| 2.| 02:08.650| [CalculateAverage_kuduwa_keshavram.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| | 2.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| [Roy van Rijn](https://github.com/royvanrijn)| | 3.| 04:13.449| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| diff --git a/calculate_average_kuduwa.sh b/calculate_average_kuduwa-keshavram.sh similarity index 92% rename from calculate_average_kuduwa.sh rename to calculate_average_kuduwa-keshavram.sh index a5ce4c275..369deebd7 100755 --- a/calculate_average_kuduwa.sh +++ b/calculate_average_kuduwa-keshavram.sh @@ -17,4 +17,4 @@ JAVA_OPTS="" -time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_kuduwa +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_kuduwa_keshavram diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java b/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java similarity index 98% rename from src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java rename to src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java index 98a3adab9..49c77c0a1 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java @@ -22,7 +22,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; -public class CalculateAverage_kuduwa { +public class CalculateAverage_kuduwa_keshavram { private static final String FILE = "./measurements.txt"; From 7a61bdf7c58891503f7dcd95cf799ee0ebbd268b Mon Sep 17 00:00:00 2001 From: Hampus Ram Date: Tue, 2 Jan 2024 14:04:29 +0100 Subject: [PATCH 13/18] Implementation using memory mapped file --- calculate_average_bjhara.sh | 20 +++ .../onebrc/CalculateAverage_bjhara.java | 151 ++++++++++++++++++ 2 files changed, 171 insertions(+) create mode 100644 calculate_average_bjhara.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java diff --git a/calculate_average_bjhara.sh b/calculate_average_bjhara.sh new file mode 100644 index 000000000..474ec22e1 --- /dev/null +++ b/calculate_average_bjhara.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_bjhara diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java b/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java new file mode 100644 index 000000000..0cc1cb79d --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java @@ -0,0 +1,151 @@ +package dev.morling.onebrc; + +import java.io.*; +import java.nio.*; +import java.nio.channels.*; +import java.nio.file.*; +import java.util.*; +import java.util.stream.*; + +public class CalculateAverage_bjhara { + private static final String FILE = "./measurements.txt"; + + private static class Measurement { + private double min = Double.POSITIVE_INFINITY; + private double max = Double.NEGATIVE_INFINITY; + private double sum; + private long count; + + public String toString() { + return round(min) + "/" + round(sum / count) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + + public static Measurement combine(Measurement m1, Measurement m2) { + var mres = new Measurement(); + mres.min = m1.min < m2.min ? m1.min : m2.min; + mres.max = m1.max > m2.max ? m1.max : m2.max; + mres.sum = m1.sum + m2.sum; + mres.count = m1.count + m2.count; + return mres; + } + } + + public static void main(String[] args) throws IOException { + try (FileChannel fileChannel = (FileChannel) Files.newByteChannel(Path.of(FILE), + EnumSet.of(StandardOpenOption.READ))) { + + var cities = splitFileChannel(fileChannel) + .parallel() + .map(CalculateAverage_bjhara::parseBuffer) + .collect(Collectors.reducing(CalculateAverage_bjhara::combineMaps)); + + var sortedCities = new TreeMap<>(cities.orElseThrow()); + System.out.println(sortedCities); + } + } + + private static Map combineMaps(Map map1, + Map map2) { + for (var entry : map2.entrySet()) { + map1.merge(entry.getKey(), entry.getValue(), Measurement::combine); + } + + return map1; + } + + private static Stream splitFileChannel(final FileChannel fileChannel) throws IOException { + return StreamSupport.stream(Spliterators.spliteratorUnknownSize(new Iterator() { + private static final long CHUNK_SIZE = 1024 * 1024 * 10L; + + private final long size = fileChannel.size(); + private long start = 0; + + @Override + public boolean hasNext() { + return start < size; + } + + @Override + public ByteBuffer next() { + try { + MappedByteBuffer mappedByteBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, start, + Math.min(CHUNK_SIZE, size - start)); + + // don't split the data in the middle of lines + // find the closest previous newline + int realEnd = mappedByteBuffer.limit() - 1; + while (mappedByteBuffer.get(realEnd) != '\n') + realEnd--; + + realEnd++; + + mappedByteBuffer.limit(realEnd); + start += realEnd; + + return mappedByteBuffer; + } + catch (IOException ex) { + throw new UncheckedIOException(ex); + } + } + }, Spliterator.IMMUTABLE), false); + } + + private static Map parseBuffer(ByteBuffer bb) { + Map cities = new HashMap<>(); + + final int limit = bb.limit(); + final byte[] buffer = new byte[128]; + + while (bb.position() < limit) { + final int currentPosition = bb.position(); + + // find the ; separator + int separator = currentPosition; + while (separator != limit && bb.get(separator) != ';') + separator++; + + // find the end of the line + int end = separator + 1; + while (end != limit && !Character.isWhitespace((char) bb.get(end))) + end++; + + // get the name as a string + int nameLength = separator - currentPosition; + bb.get(buffer, 0, nameLength); + String city = new String(buffer, 0, nameLength); + + // get rid of the separator + bb.get(); + + // get the double value + int valueLength = end - separator - 1; + bb.get(buffer, 0, valueLength); + String valueStr = new String(buffer, 0, valueLength); + double value = Double.parseDouble(valueStr); + + // and get rid of the new line (handle both kinds) + byte newline = bb.get(); + if (newline == '\r') + bb.get(); + + // update the map with the new measurement + Measurement agg = cities.get(city); + if (agg == null) { + agg = new Measurement(); + cities.put(city, agg); + } + + agg.min = agg.min < value ? agg.min : value; + agg.max = agg.max > value ? agg.max : value; + agg.sum += value; + agg.count++; + } + + return cities; + } +} From 80bbdaba8cdd6dee2d894c0410b66947a9fd207f Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 20:43:14 +0100 Subject: [PATCH 14/18] Evaluating bjhara's entry --- README.md | 1 + calculate_average_bjhara.sh | 0 .../morling/onebrc/CalculateAverage_bjhara.java | 15 +++++++++++++++ 3 files changed, 16 insertions(+) mode change 100644 => 100755 calculate_average_bjhara.sh diff --git a/README.md b/README.md index 2b1cb16a2..23d17679e 100644 --- a/README.md +++ b/README.md @@ -37,6 +37,7 @@ Submit your implementation by Jan 31 2024 and become part of the leaderboard! | # | Result (m:s:ms) | Implementation | Submitter | |---|-----------------|--------------------|---------------| +| 1.| 00:38.510| [CalculateAverage_bjhara.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java)| [Hampus Ram](https://github.com/bjhara)| | 1.| 02:08.315| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| | 2.| 02:08.650| [CalculateAverage_kuduwa_keshavram.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| | 2.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| [Roy van Rijn](https://github.com/royvanrijn)| diff --git a/calculate_average_bjhara.sh b/calculate_average_bjhara.sh old mode 100644 new mode 100755 diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java b/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java index 0cc1cb79d..8296db73d 100644 --- a/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java @@ -1,3 +1,18 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ package dev.morling.onebrc; import java.io.*; From 5d0036704c1c66d510ba111b150c0fc89a9cf16a Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 20:46:33 +0100 Subject: [PATCH 15/18] Numbering --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 23d17679e..160e82cee 100644 --- a/README.md +++ b/README.md @@ -38,10 +38,10 @@ Submit your implementation by Jan 31 2024 and become part of the leaderboard! | # | Result (m:s:ms) | Implementation | Submitter | |---|-----------------|--------------------|---------------| | 1.| 00:38.510| [CalculateAverage_bjhara.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java)| [Hampus Ram](https://github.com/bjhara)| -| 1.| 02:08.315| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| -| 2.| 02:08.650| [CalculateAverage_kuduwa_keshavram.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| -| 2.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| [Roy van Rijn](https://github.com/royvanrijn)| -| 3.| 04:13.449| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| +| 2.| 02:08.315| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| +| 3.| 02:08.650| [CalculateAverage_kuduwa_keshavram.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| +| 4.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| [Roy van Rijn](https://github.com/royvanrijn)| +| 5.| 04:13.449| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| See [below](#entering-the-challenge) for instructions how to enter the challenge with your own implementation. From 8b5702ef54d0d753b6ac8e9f3d13ab0631e9f30d Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 21:01:58 +0100 Subject: [PATCH 16/18] Evaluating Roy's updated entry --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 160e82cee..0ef0140ef 100644 --- a/README.md +++ b/README.md @@ -37,11 +37,11 @@ Submit your implementation by Jan 31 2024 and become part of the leaderboard! | # | Result (m:s:ms) | Implementation | Submitter | |---|-----------------|--------------------|---------------| -| 1.| 00:38.510| [CalculateAverage_bjhara.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java)| [Hampus Ram](https://github.com/bjhara)| -| 2.| 02:08.315| [CalculateAverage_itaske.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| -| 3.| 02:08.650| [CalculateAverage_kuduwa_keshavram.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| -| 4.| 02:08.845| [CalculateAverage_royvanrijn.java](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_royvanrijn.java)| [Roy van Rijn](https://github.com/royvanrijn)| -| 5.| 04:13.449| [CalculateAverage.java](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| +| 1.| 00:23.366| [link](https://github.com/gunnarmorling/1brc/pull/5/)| [Roy van Rijn](https://github.com/royvanrijn)| +| 2.| 00:38.510| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java)| [Hampus Ram](https://github.com/bjhara)| +| 3.| 02:08.315| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| +| 4.| 02:08.650| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| +| 5.| 04:13.449| [link](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| See [below](#entering-the-challenge) for instructions how to enter the challenge with your own implementation. From d6f1cf116bf9f5ece9b29280b5059552ce82bb1f Mon Sep 17 00:00:00 2001 From: Aurelian Tutuianu Date: Tue, 2 Jan 2024 21:14:32 +0200 Subject: [PATCH 17/18] - implementation by padreati --- calculate_average_padreati.sh | 20 ++ pom.xml | 5 + .../onebrc/CalculateAverage_padreati.java | 197 ++++++++++++++++++ 3 files changed, 222 insertions(+) create mode 100755 calculate_average_padreati.sh create mode 100644 src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java diff --git a/calculate_average_padreati.sh b/calculate_average_padreati.sh new file mode 100755 index 000000000..c35e7a55f --- /dev/null +++ b/calculate_average_padreati.sh @@ -0,0 +1,20 @@ +#!/bin/sh +# +# Copyright 2023 The original authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +JAVA_OPTS="--enable-preview --add-modules jdk.incubator.vector" +time java $JAVA_OPTS --class-path target/average-1.0.0-SNAPSHOT.jar dev.morling.onebrc.CalculateAverage_padreati diff --git a/pom.xml b/pom.xml index 401e8260c..f54d1f195 100644 --- a/pom.xml +++ b/pom.xml @@ -103,6 +103,11 @@ 3.8.1 true + + --enable-preview + --add-modules + java.base,jdk.incubator.vector + diff --git a/src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java b/src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java new file mode 100644 index 000000000..1a3273224 --- /dev/null +++ b/src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java @@ -0,0 +1,197 @@ +/* + * Copyright 2023 The original authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package dev.morling.onebrc; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.StructuredTaskScope; + +import jdk.incubator.vector.ByteVector; +import jdk.incubator.vector.VectorOperators; +import jdk.incubator.vector.VectorSpecies; + +public class CalculateAverage_padreati { + + private static final VectorSpecies species = ByteVector.SPECIES_PREFERRED; + private static final String FILE = "./measurements.txt"; + private static final int CHUNK_SIZE = 1024 * 1024; + + private record ResultRow(double min, double mean, double max) { + public String toString() { + return round(min) + "/" + round(mean) + "/" + round(max); + } + + private double round(double value) { + return Math.round(value * 10.0) / 10.0; + } + } + + private record MeasurementAggregator(double min, double max, double sum, long count) { + + public MeasurementAggregator(double seed) { + this(seed, seed, seed, 1); + } + + public MeasurementAggregator merge(MeasurementAggregator b) { + return new MeasurementAggregator( + Math.min(min, b.min), + Math.max(max, b.max), + sum + b.sum, + count + b.count + ); + } + + public ResultRow toResultRow() { + return new ResultRow(min, sum / count, max); + } + } + + public static void main(String[] args) throws IOException { + new CalculateAverage_padreati().run(); + } + + private void run() throws IOException { + File file = new File(FILE); + var splits = findFileSplits(); + List>> subtasks = new ArrayList<>(); + try (var scope = new StructuredTaskScope.ShutdownOnFailure()) { + for (int i = 0; i < splits.size(); i++) { + long splitStart = splits.get(i); + long splitEnd = i < splits.size() - 1 ? splits.get(i + 1) : file.length() + 1; + subtasks.add(scope.fork(() -> chunkProcessor(file, splitStart, splitEnd))); + } + scope.join(); + scope.throwIfFailed(); + + var resultList = subtasks.stream().map(StructuredTaskScope.Subtask::get).toList(); + TreeMap measurements = collapseResults(resultList); + System.out.println(measurements); + + } + catch (InterruptedException | ExecutionException e) { + throw new RuntimeException(e); + } + } + + private List findFileSplits() throws IOException { + var splits = new ArrayList(); + splits.add(0L); + + File file = new File(FILE); + long next = CHUNK_SIZE; + while (true) { + if (next >= file.length()) { + break; + } + try (FileInputStream fis = new FileInputStream(file)) { + long skip = fis.skip(next); + if (skip != next) { + throw new RuntimeException(); + } + // find first new line + while (true) { + int ch = fis.read(); + if (ch != '\n') { + next++; + continue; + } + break; + } + // skip eventual \\r + if (fis.read() == '\r') { + next++; + } + splits.add(next + 1); + next += CHUNK_SIZE; + } + } + return splits; + } + + public Map chunkProcessor(File source, long start, long end) throws IOException { + var map = new HashMap(); + byte[] buffer = new byte[(int) (end - start)]; + int len; + try (FileInputStream bis = new FileInputStream(source)) { + bis.skip(start); + len = bis.read(buffer, 0, buffer.length); + } + + List nlIndexes = new ArrayList<>(); + List commaIndexes = new ArrayList<>(); + + int loopBound = species.loopBound(len); + int i = 0; + + for (; i < loopBound; i += species.length()) { + ByteVector v = ByteVector.fromArray(species, buffer, i); + var mask = v.compare(VectorOperators.EQ, '\n'); + for (int j = 0; j < species.length(); j++) { + if (mask.laneIsSet(j)) { + nlIndexes.add(i + j); + } + } + mask = v.compare(VectorOperators.EQ, ';'); + for (int j = 0; j < species.length(); j++) { + if (mask.laneIsSet(j)) { + commaIndexes.add(i + j); + } + } + } + for (; i < len; i++) { + if (buffer[i] == '\n') { + nlIndexes.add(i); + } + if (buffer[i] == ';') { + commaIndexes.add(i); + } + } + + int startLine = 0; + for (int j = 0; j < nlIndexes.size(); j++) { + int endLine = nlIndexes.get(j); + int commaIndex = commaIndexes.get(j); + String key = new String(buffer, startLine, commaIndex - startLine); + double value = Double.parseDouble(new String(buffer, commaIndex + 1, endLine - commaIndex - 1)); + map.merge(key, new MeasurementAggregator(value), MeasurementAggregator::merge); + startLine = endLine + 1; + } + return map; + } + + private TreeMap collapseResults(List> resultList) { + HashMap aggregate = new HashMap<>(); + for (var map : resultList) { + for (var entry : map.entrySet()) { + aggregate.merge(entry.getKey(), entry.getValue(), MeasurementAggregator::merge); + } + } + TreeMap measurements = new TreeMap<>(); + for (var entry : aggregate.entrySet()) { + measurements.put(entry.getKey(), entry.getValue().toResultRow()); + } + return measurements; + } + +} From 760067e18e32718b3766511acf319ceca22a864b Mon Sep 17 00:00:00 2001 From: Gunnar Morling Date: Tue, 2 Jan 2024 21:20:40 +0100 Subject: [PATCH 18/18] Evaluating Aurelian Tutuianu's entry --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0ef0140ef..970b1419b 100644 --- a/README.md +++ b/README.md @@ -39,9 +39,10 @@ Submit your implementation by Jan 31 2024 and become part of the leaderboard! |---|-----------------|--------------------|---------------| | 1.| 00:23.366| [link](https://github.com/gunnarmorling/1brc/pull/5/)| [Roy van Rijn](https://github.com/royvanrijn)| | 2.| 00:38.510| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_bjhara.java)| [Hampus Ram](https://github.com/bjhara)| -| 3.| 02:08.315| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| -| 4.| 02:08.650| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| -| 5.| 04:13.449| [link](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| +| 3.| 00:50.547| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_padreati.java)| [Aurelian Tutuianu](https://github.com/padreati)| +| 4.| 02:08.315| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_itaske.java)| [itaske](https://github.com/itaske)| +| 5.| 02:08.650| [link](https://github.com/gunnarmorling/1brc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage_kuduwa_keshavram.java)| [Kuduwa Keshavram](https://github.com/kuduwa_keshavram)| +| 6.| 04:13.449| [link](https://github.com/gunnarmorling/onebrc/blob/main/src/main/java/dev/morling/onebrc/CalculateAverage.java) (baseline)| [Gunnar Morling](https://github.com/gunnarmorling)| See [below](#entering-the-challenge) for instructions how to enter the challenge with your own implementation.