Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(java): reduce metastring hashcode payload for small string(<=16 bytes) #1909

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ The text of each license is also included in licenses/LICENSE-[project].txt.
java/fury-core/src/main/java/org/apache/fury/collection/IdentityMap.java
java/fury-core/src/main/java/org/apache/fury/collection/IdentityObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/collection/LongMap.java
java/fury-core/src/main/java/org/apache/fury/collection/LongLongMap.java
java/fury-core/src/main/java/org/apache/fury/collection/ObjectIntMap.java
java/fury-core/src/main/java/org/apache/fury/type/Generics.java
java/fury-core/src/test/java/org/apache/fury/type/GenericsTest.java
Expand Down
29 changes: 20 additions & 9 deletions go/fury/type.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package fury

import (
"fmt"
"github.com/apache/fury/go/fury/meta"
"hash/fnv"
"reflect"
"regexp"
Expand Down Expand Up @@ -136,6 +137,7 @@ const (
NotSupportCrossLanguage = 0
useStringValue = 0
useStringId = 1
SMALL_STRING_THRESHOLD = 16
)

var (
Expand Down Expand Up @@ -551,14 +553,19 @@ func (r *typeResolver) writeMetaString(buffer *ByteBuffer, str string) error {
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
r.dynamicStringToId[str] = dynamicStringId
buffer.WriteVarInt32(int32(len(str) << 1))
// TODO this hash should be unique, since we don't compare data equality for performance
h := fnv.New64a()
if _, err := h.Write([]byte(str)); err != nil {
return err
length := len(str)
buffer.WriteVarInt32(int32(length << 1))
if length <= SMALL_STRING_THRESHOLD {
buffer.WriteByte_(uint8(meta.UTF_8))
} else {
// TODO this hash should be unique, since we don't compare data equality for performance
h := fnv.New64a()
if _, err := h.Write([]byte(str)); err != nil {
return err
}
hash := int64(h.Sum64() & 0xffffffffffffff00)
buffer.WriteInt64(hash)
}
hash := int64(h.Sum64() & 0xffffffffffffff00)
buffer.WriteInt64(hash)
if len(str) > MaxInt16 {
return fmt.Errorf("too long string: %s", str)
}
Expand All @@ -573,8 +580,12 @@ func (r *typeResolver) readMetaString(buffer *ByteBuffer) (string, error) {
header := buffer.ReadVarInt32()
var length = int(header >> 1)
if header&0b1 == 0 {
// TODO support use computed hash
buffer.ReadInt64()
if length <= SMALL_STRING_THRESHOLD {
buffer.ReadByte_()
} else {
// TODO support use computed hash
buffer.ReadInt64()
}
str := string(buffer.ReadBinary(length))
dynamicStringId := r.dynamicStringId
r.dynamicStringId += 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
*/
@SuppressWarnings("unchecked")
public class FuryObjectMap<K, V> {
static final long MASK_NUMBER = 0x9E3779B97F4A7C15L;
static final Object dummy = new Object();

public int size;
Expand Down Expand Up @@ -135,7 +136,7 @@ public FuryObjectMap(int initialCapacity, float loadFactor) {
* {@code return item.hashCode() & mask;}
*/
protected int place(K item) {
return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
return (int) (item.hashCode() * MASK_NUMBER >>> shift);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
/* Copyright (c) 2008-2023, Nathan Sweet
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided with the distribution.
* - Neither the name of Esoteric Software nor the names of its contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */

package org.apache.fury.collection;

import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;

import org.apache.fury.annotation.Internal;
import org.apache.fury.util.Preconditions;

/**
* A fast linear hash probe based map whose key is two long values `(long k1, long k2)`. This map
* can avoid creating a java object for key to save memory/cpu cost.
*/
// The linear probed hash is derived from
// https://github.com/EsotericSoftware/kryo/blob/135df69526615bb3f6b34846e58ba3fec3b631c3/src/com/esotericsoftware/kryo/util/IntMap.java.
@SuppressWarnings("unchecked")
@Internal
public final class LongLongMap<V> {
private static final class LongLongKey {
private final long k1;

public LongLongKey(long k1, long k2) {
this.k1 = k1;
this.k2 = k2;
}

private final long k2;

@Override
public String toString() {
return "LongLongKey{" + "k1=" + k1 + ", k2=" + k2 + '}';
}
}

public int size;
LongLongKey[] keyTable;
V[] valueTable;
private final float loadFactor;
private int threshold;

private int shift;

private int mask;

/**
* Creates a new map with the specified initial capacity and load factor. This map will hold
* initialCapacity items before growing the backing table.
*
* @param initialCapacity If not a power of two, it is increased to the next nearest power of two.
*/
public LongLongMap(int initialCapacity, float loadFactor) {
Preconditions.checkArgument(
0 <= loadFactor && loadFactor <= 1, "loadFactor %s must be > 0 and < 1", loadFactor);
this.loadFactor = loadFactor;
int tableSize = FuryObjectMap.tableSize(initialCapacity, loadFactor);
threshold = (int) (tableSize * loadFactor);
mask = tableSize - 1;
shift = Long.numberOfLeadingZeros(mask);
keyTable = new LongLongKey[tableSize];
valueTable = (V[]) new Object[tableSize];
}

private int place(long k1, long k2) {
return (int) ((k1 * 31 + k2) * MASK_NUMBER >>> shift);
}

/**
* Returns the index of the key if already present, else -(index + 1) for the next empty index.
* This can be overridden in this pacakge to compare for equality differently than {@link
* Object#equals(Object)}.
*/
private int locateKey(long k1, long k2) {
LongLongKey[] keyTable = this.keyTable;
int mask = this.mask;
for (int i = place(k1, k2); ; i = i + 1 & mask) {
LongLongKey other = keyTable[i];
if (other == null) {
return -(i + 1); // Empty space is available.
}
if (other.k1 == k1 && other.k2 == k2) {
return i; // Same key was found.
}
}
}

public V put(long k1, long k2, V value) {
int i = locateKey(k1, k2);
if (i >= 0) { // Existing key was found.
V[] valueTable = this.valueTable;
V oldValue = valueTable[i];
valueTable[i] = value;
return oldValue;
}
i = -(i + 1); // Empty space was found.
keyTable[i] = new LongLongKey(k1, k2);
valueTable[i] = value;
if (++size >= threshold) {
resize(keyTable.length << 1);
}
return null;
}

public V get(long k1, long k2) {
LongLongKey[] keyTable = this.keyTable;
for (int i = place(k1, k2); ; i = i + 1 & mask) {
LongLongKey other = keyTable[i];
if (other == null) {
return null;
}
if (other.k1 == k1 && other.k2 == k2) {
return valueTable[i];
}
}
}

private void resize(int newSize) {
int oldCapacity = keyTable.length;
threshold = (int) (newSize * loadFactor);
mask = newSize - 1;
shift = Long.numberOfLeadingZeros(mask);
LongLongKey[] oldKeyTable = keyTable;
V[] oldValueTable = valueTable;
keyTable = new LongLongKey[newSize];
valueTable = (V[]) new Object[newSize];
if (size > 0) {
for (int i = 0; i < oldCapacity; i++) {
LongLongKey key = oldKeyTable[i];
if (key != null) {
for (int j = place(key.k1, key.k2); ; j = (j + 1) & mask) {
if (keyTable[j] == null) {
keyTable[j] = new LongLongKey(key.k1, key.k2);
valueTable[j] = oldValueTable[i];
break;
}
}
}
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.fury.collection;

import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;

import java.util.Arrays;

// Derived from
Expand Down Expand Up @@ -141,7 +143,7 @@ public LongMap(LongMap<? extends V> map) {
* {@code return item.hashCode() & mask;}
*/
protected int place(long item) {
return (int) (item * 0x9E3779B97F4A7C15L >>> shift);
return (int) (item * MASK_NUMBER >>> shift);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@

package org.apache.fury.collection;

import static org.apache.fury.collection.FuryObjectMap.MASK_NUMBER;

import java.util.HashMap;
import java.util.Map;
import java.util.function.BiConsumer;
Expand Down Expand Up @@ -59,7 +61,7 @@ public ObjectIntMap(int initialCapacity, float loadFactor) {
}

protected int place(K item) {
return (int) (item.hashCode() * 0x9E3779B97F4A7C15L >>> shift);
return (int) (item.hashCode() * MASK_NUMBER >>> shift);
}

int locateKey(K key) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ private static Object readFromChannel(
Fury fury, ReadableByteChannel channel, Function<MemoryBuffer, Object> action) {
try {
MemoryBuffer buf = fury.getBuffer();
buf.readerIndex(0);
ByteBuffer byteBuffer = ByteBuffer.allocate(4);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
readByteBuffer(channel, byteBuffer, 4);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ public static long getInt64(Object o, long pos) {
return Platform.IS_LITTLE_ENDIAN ? v : Long.reverseBytes(v);
}

public static void putInt64(byte[] o, int index, long value) {
if (!Platform.IS_LITTLE_ENDIAN) {
value = Long.reverseBytes(value);
}
Platform.putLong(o, Platform.BYTE_ARRAY_OFFSET + index, value);
}

public static void putFloat32(Object o, long pos, float value) {
int v = Float.floatToRawIntBits(value);
if (!Platform.IS_LITTLE_ENDIAN) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2174,6 +2174,42 @@ public void readBytes(byte[] dst) {
readBytes(dst, 0, dst.length);
}

/** Read {@code len} bytes into a long using little-endian order. */
public long readBytesAsInt64(int len) {
int readerIdx = readerIndex;
// use subtract to avoid overflow
int remaining = size - readerIdx;
if (remaining >= 8) {
readerIndex = readerIdx + len;
long v =
UNSAFE.getLong(heapMemory, address + readerIdx)
& (0xffffffffffffffffL >>> ((8 - len) * 8));
return LITTLE_ENDIAN ? v : Long.reverseBytes(v);
}
return slowReadBytesAsInt64(remaining, len);
}

private long slowReadBytesAsInt64(int remaining, int len) {
if (remaining < len) {
streamReader.fillBuffer(len - remaining);
}
int readerIdx = readerIndex;
readerIndex = readerIdx + len;
long result = 0;
byte[] heapMemory = this.heapMemory;
if (heapMemory != null) {
for (int i = 0, start = heapOffset + readerIdx; i < len; i++) {
result |= (((long) heapMemory[start + i]) & 0xff) << (i * 8);
}
} else {
long start = address + readerIdx;
for (int i = 0; i < len; i++) {
result |= ((long) UNSAFE.getByte(null, start + i) & 0xff) << (i * 8);
}
}
return result;
}

public int read(ByteBuffer dst) {
int readerIdx = readerIndex;
int len = dst.remaining();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1619,6 +1619,8 @@ public void writeClassInternal(MemoryBuffer buffer, ClassInfo classInfo) {
if (classInfo.classId != NO_CLASS_ID) {
buffer.writeVarUint32(classInfo.classId << 1);
} else {
// let the lowermost bit of next byte be set, so the deserialization can know
// whether need to read class by name in advance
metaStringResolver.writeMetaStringBytesWithFlag(buffer, classInfo.packageNameBytes);
metaStringResolver.writeMetaStringBytes(buffer, classInfo.classNameBytes);
}
Expand All @@ -1634,6 +1636,8 @@ public Class<?> readClassInternal(MemoryBuffer buffer) {
int header = buffer.readVarUint32Small14();
final ClassInfo classInfo;
if ((header & 0b1) != 0) {
// let the lowermost bit of next byte be set, so the deserialization can know
// whether need to read class by name in advance
MetaStringBytes packageBytes = metaStringResolver.readMetaStringBytesWithFlag(buffer, header);
MetaStringBytes simpleClassNameBytes = metaStringResolver.readMetaStringBytes(buffer);
classInfo = loadBytesToClassInfo(packageBytes, simpleClassNameBytes);
Expand Down
Loading
Loading