Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(java/python): new xlang type system spec implementation #1690

Merged
merged 79 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
79 commits
Select commit Hold shift + click to select a range
f1efacd
remove xlang-unsupported object serialization
chaokunyang Jun 3, 2024
ee381af
new xlang spec in java
chaokunyang Jun 16, 2024
07e24c1
fix compile error
chaokunyang Jun 16, 2024
4a59adc
move arrow type id to format module
chaokunyang Jul 13, 2024
42a6f58
refine type register API
chaokunyang Jul 13, 2024
a3d4d9d
refine type register API
chaokunyang Jul 13, 2024
bf98a76
add comments to MetaStringResolver
chaokunyang Jul 14, 2024
8dba32e
refactor type system
chaokunyang Jul 14, 2024
3eba2d9
refactor type system
chaokunyang Jul 14, 2024
85eabd7
Merge remote-tracking branch 'ant/main' into new_xlang_sepc_impl
chaokunyang Oct 27, 2024
04dd07f
Merge remote-tracking branch 'ant/main' into new_xlang_sepc_impl
chaokunyang Nov 2, 2024
ddb5766
fix fury
chaokunyang Nov 3, 2024
22d4924
Merge remote-tracking branch 'ant/main' into new_xlang_sepc_impl
chaokunyang Nov 23, 2024
3871b19
rewrite xlang type dispatch
chaokunyang Nov 23, 2024
efdcde4
fix compile error
chaokunyang Nov 23, 2024
a4b4e26
java xlang serialization type dispatch
chaokunyang Nov 23, 2024
b3db2c7
add header
chaokunyang Nov 23, 2024
c9d9ff0
add license header for exception
chaokunyang Nov 23, 2024
79159ce
add date support
chaokunyang Nov 24, 2024
bc15ed7
refine type system
chaokunyang Nov 24, 2024
3ef5e35
add type name hash check
chaokunyang Nov 24, 2024
4499c2b
new type def
chaokunyang Dec 4, 2024
3c0f316
new type def
chaokunyang Dec 4, 2024
d666140
part1: new serialization impl in py
chaokunyang Dec 4, 2024
aab37a7
part2: new serialization impl in python
chaokunyang Dec 8, 2024
84c5be7
fix read/write classinfo
chaokunyang Dec 8, 2024
32fdc0c
refine ClassInfo doc
chaokunyang Dec 8, 2024
34f4b52
remove native opaque object xlang support
chaokunyang Dec 8, 2024
5aaaf52
class registration part3
chaokunyang Dec 14, 2024
15b59ca
Merge remote-tracking branch 'ant/main' into new_xlang_sepc_impl
chaokunyang Dec 14, 2024
041c82c
class registration part4
chaokunyang Dec 14, 2024
1ca4adf
adjust code structure
chaokunyang Dec 14, 2024
5890d2c
Merge remote-tracking branch 'ant/main' into new_xlang_sepc_impl
chaokunyang Dec 16, 2024
d2fb76e
fix compile and import
chaokunyang Dec 17, 2024
473c796
fix compile and import
chaokunyang Dec 17, 2024
1a5f945
fix test
chaokunyang Dec 18, 2024
c7e2a55
fix pyfury impl
chaokunyang Dec 19, 2024
5198bda
fix pickle cache serializer
chaokunyang Dec 19, 2024
7951245
fix pyfury xlang serialization
chaokunyang Dec 20, 2024
0715b47
fix meta string parsing
chaokunyang Dec 20, 2024
9890038
fix internal type registration
chaokunyang Dec 20, 2024
3a05739
fix type info write/parse
chaokunyang Dec 20, 2024
2c68aa0
fix buffer object
chaokunyang Dec 20, 2024
ccb3639
c++ GetBytesAsInt64 and cython wrapper
chaokunyang Dec 20, 2024
c3fb155
read_bytes_as_int64 and metatring test
chaokunyang Dec 20, 2024
c660532
fix pyfury tests
chaokunyang Dec 20, 2024
79e7bf6
lint java code
chaokunyang Dec 20, 2024
e815aa8
lint python code
chaokunyang Dec 20, 2024
3073e3c
lint cython code
chaokunyang Dec 20, 2024
18a08f8
fix compute hash
chaokunyang Dec 20, 2024
1eb20da
fix visit_customized
chaokunyang Dec 22, 2024
35ddf2f
add missing license header
chaokunyang Dec 22, 2024
874062a
skip go xlang tests
chaokunyang Dec 22, 2024
40f1197
remove OpaqueObject support
chaokunyang Dec 22, 2024
dd1272d
deserialize according passed type info
chaokunyang Dec 23, 2024
41ad3a2
create XtypeResolver for xlang only
chaokunyang Dec 23, 2024
2f5d5a4
rename NS to NAMED
chaokunyang Dec 23, 2024
09b88b6
support varint/varuint in pyfury
chaokunyang Dec 24, 2024
d304d88
support varint/varuint in xlang java
chaokunyang Dec 24, 2024
2d1e402
fix named type read
chaokunyang Dec 24, 2024
47774d6
fix map polymorphic
chaokunyang Dec 25, 2024
ff0bf3c
fix struct hash
chaokunyang Dec 25, 2024
9d5a0c9
fix struct object array field serialization
chaokunyang Dec 25, 2024
704259b
Merge remote-tracking branch 'ant/main' into new_xlang_sepc_impl
chaokunyang Dec 25, 2024
4f25a58
fix metastring
chaokunyang Dec 26, 2024
f4242cd
fix enum type id
chaokunyang Dec 26, 2024
43eee77
support timestamp polymorphic
chaokunyang Dec 26, 2024
01bf5eb
fix xlang tests
chaokunyang Dec 26, 2024
2e150a4
fix xlang tests
chaokunyang Dec 26, 2024
5dc72df
lint code and fix tests
chaokunyang Dec 26, 2024
a9a3cff
fix array polymorphic
chaokunyang Dec 26, 2024
e4ffe1d
fix array polymorphic
chaokunyang Dec 26, 2024
e49b2ce
fix arrow register
chaokunyang Dec 26, 2024
a6d9959
refine xlang type system spec
chaokunyang Dec 26, 2024
041878b
lint code
chaokunyang Dec 26, 2024
00f3aaf
lint code
chaokunyang Dec 26, 2024
c9e22d9
add javadoc to java xlang types
chaokunyang Dec 27, 2024
0140001
add doc to type system
chaokunyang Dec 27, 2024
02001ad
add missing named type
chaokunyang Dec 27, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ pyx_library(
),
deps = [
"//cpp/fury/util:fury_util",
"//cpp/fury/type:fury_type",
"@com_google_absl//absl/container:flat_hash_map",
],
)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class SomeClass:
f3: Dict[str, str]

fury = pyfury.Fury(ref_tracking=True)
fury.register_class(SomeClass, type_tag="example.SomeClass")
fury.register_type(SomeClass, typename="example.SomeClass")
obj = SomeClass()
obj.f2 = {"k1": "v1", "k2": "v2"}
obj.f1, obj.f3 = obj, obj.f2
Expand Down
15 changes: 15 additions & 0 deletions cpp/fury/type/BUILD
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test")

cc_library(
name = "fury_type",
srcs = glob(["*.cc"], exclude=["*test.cc"]),
hdrs = glob(["*.h"]),
copts = ["-mavx2"], # Enable AVX2 support
linkopts = ["-mavx2"], # Ensure linker also knows about AVX2
strip_include_prefix = "/cpp",
alwayslink=True,
linkstatic=True,
deps = [
],
visibility = ["//visibility:public"],
)
153 changes: 153 additions & 0 deletions cpp/fury/type/type.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

#include <cstdint> // For fixed-width integer types

namespace fury {
enum class TypeId : int32_t {
// a boolean value (true or false).
BOOL = 1,
// a 8-bit signed integer.
INT8 = 2,
// a 16-bit signed integer.
INT16 = 3,
// a 32-bit signed integer.
INT32 = 4,
// a 32-bit signed integer which use fury var_int32 encoding.
VAR_INT32 = 5,
// a 64-bit signed integer.
INT64 = 6,
// a 64-bit signed integer which use fury PVL encoding.
VAR_INT64 = 7,
// a 64-bit signed integer which use fury SLI encoding.
SLI_INT64 = 8,
// a 16-bit floating point number.
FLOAT16 = 9,
// a 32-bit floating point number.
FLOAT32 = 10,
// a 64-bit floating point number including NaN and Infinity.
FLOAT64 = 11,
// a text string encoded using Latin1/UTF16/UTF-8 encoding.
STRING = 12,
// a data type consisting of a set of named values. Rust enum with
// non-predefined field values are not supported as an enum
ENUM = 13,
// an enum whose value will be serialized as the registered name.
NAMED_ENUM = 14,
// a morphic(final) type serialized by Fury Struct serializer. i.e. it doesn't
// have subclasses. Suppose we're
// deserializing `List<SomeClass>`, we can save dynamic serializer dispatch
// since `SomeClass` is morphic(final).
STRUCT = 15,
// a type which is not morphic(not final). i.e. it has subclasses. Suppose
// we're deserializing
// `List<SomeClass>`, we must dispatch serializer dynamically since
// `SomeClass` is polymorphic(non-final).
POLYMORPHIC_STRUCT = 16,
// a morphic(final) type serialized by Fury compatible Struct serializer.
COMPATIBLE_STRUCT = 17,
// a non-morphic(non-final) type serialized by Fury compatible Struct
// serializer.
POLYMORPHIC_COMPATIBLE_STRUCT = 18,
// a `struct` whose type mapping will be encoded as a name.
NAMED_STRUCT = 19,
// a `polymorphic_struct` whose type mapping will be encoded as a name.
NAMED_POLYMORPHIC_STRUCT = 20,
// a `compatible_struct` whose type mapping will be encoded as a name.
NAMED_COMPATIBLE_STRUCT = 21,
// a `polymorphic_compatible_struct` whose type mapping will be encoded as a
// name.
NAMED_POLYMORPHIC_COMPATIBLE_STRUCT = 22,
// a type which will be serialized by a customized serializer.
EXT = 23,
// an `ext` type which is not morphic(not final).
POLYMORPHIC_EXT = 24,
// an `ext` type whose type mapping will be encoded as a name.
NAMED_EXT = 25,
// an `polymorphic_ext` type whose type mapping will be encoded as a name.
NAMED_POLYMORPHIC_EXT = 26,
// a sequence of objects.
LIST = 27,
// an unordered set of unique elements.
SET = 28,
// a map of key-value pairs. Mutable types such as
// `list/map/set/array/tensor/arrow` are not allowed as key of map.
MAP = 29,
// an absolute length of time, independent of any calendar/timezone, as a
// count of nanoseconds.
DURATION = 30,
// a point in time, independent of any calendar/timezone, as a count of
// nanoseconds. The count is relative
// to an epoch at UTC midnight on January 1, 1970.
TIMESTAMP = 31,
// a naive date without timezone. The count is days relative to an epoch at
// UTC midnight on Jan 1, 1970.
LOCAL_DATE = 32,
// exact decimal value represented as an integer value in two's complement.
DECIMAL = 33,
// an variable-length array of bytes.
BINARY = 34,
// a multidimensional array which every sub-array can have different sizes but
// all have same type.
// only allow numeric components. Other arrays will be taken as List. The
// implementation should support the
// interoperability between array and list.
ARRAY = 35,
// one dimensional bool array.
BOOL_ARRAY = 36,
// one dimensional int16 array.
INT8_ARRAY = 37,
// one dimensional int16 array.
INT16_ARRAY = 38,
// one dimensional int32 array.
INT32_ARRAY = 39,
// one dimensional int64 array.
INT64_ARRAY = 40,
// one dimensional half_float_16 array.
FLOAT16_ARRAY = 41,
// one dimensional float32 array.
FLOAT32_ARRAY = 42,
// one dimensional float64 array.
FLOAT64_ARRAY = 43,
// an arrow [record
// batch](https://arrow.apache.org/docs/cpp/tables.html#record-batches)
// object.
ARROW_RECORD_BATCH = 44,
// an arrow [table](https://arrow.apache.org/docs/cpp/tables.html#tables)
// object.
ARROW_TABLE = 45,
BOUND = 64
};

inline bool IsNamespacedType(int32_t type_id) {
switch (static_cast<TypeId>(type_id)) {
case TypeId::NAMED_ENUM:
case TypeId::NAMED_STRUCT:
case TypeId::NAMED_POLYMORPHIC_STRUCT:
case TypeId::NAMED_COMPATIBLE_STRUCT:
case TypeId::NAMED_POLYMORPHIC_COMPATIBLE_STRUCT:
case TypeId::NAMED_EXT:
case TypeId::NAMED_POLYMORPHIC_EXT:
return true;
default:
return false;
}
}

} // namespace fury
24 changes: 24 additions & 0 deletions cpp/fury/util/buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "fury/util/bit_util.h"
#include "fury/util/logging.h"
#include "fury/util/status.h"

namespace fury {

Expand Down Expand Up @@ -133,6 +134,29 @@ class Buffer {

inline double GetDouble(uint32_t offset) { return Get<double>(offset); }

inline Status GetBytesAsInt64(uint32_t offset, uint32_t length,
int64_t *target) {
if (length == 0) {
*target = 0;
return Status::OK();
}
if (size_ - (offset + 8) > 0) {
uint64_t mask = 0xffffffffffffffff;
uint64_t x = (mask >> (8 - length) * 8);
*target = GetInt64(offset) & x;
} else {
if (size_ - (offset + length) < 0) {
return Status::OutOfBound("buffer out of bound");
}
int64_t result = 0;
for (size_t i = 0; i < length; i++) {
result = result | ((int64_t)(data_[offset + i])) << (i * 8);
}
*target = result;
}
return Status::OK();
}

inline uint32_t PutVarUint32(uint32_t offset, int32_t value) {
if (value >> 7 == 0) {
data_[offset] = (int8_t)value;
Expand Down
10 changes: 10 additions & 0 deletions cpp/fury/util/buffer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ TEST(Buffer, TestVarUint) {
}
}

TEST(Buffer, TestGetBytesAsInt64) {
std::shared_ptr<Buffer> buffer;
AllocateBuffer(64, &buffer);
buffer->UnsafePut<int32_t>(0, 100);
int64_t result = -1;
EXPECT_TRUE(buffer->GetBytesAsInt64(0, 0, &result).ok());
EXPECT_EQ(result, 0);
EXPECT_TRUE(buffer->GetBytesAsInt64(0, 1, &result).ok());
EXPECT_EQ(result, 100);
}
} // namespace fury

int main(int argc, char **argv) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/fury/util/logging.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,4 +111,4 @@ bool FuryLog::IsLevelEnabled(FuryLogLevel log_level) {
return log_level >= fury_severity_threshold;
}

} // namespace fury
} // namespace fury
15 changes: 10 additions & 5 deletions cpp/fury/util/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,12 @@ namespace fury {
enum class StatusCode : char {
OK = 0,
OutOfMemory = 1,
KeyError = 2,
TypeError = 3,
Invalid = 4,
IOError = 5,
UnknownError = 6,
OutOfBound = 2,
KeyError = 3,
TypeError = 4,
Invalid = 5,
IOError = 6,
UnknownError = 7,
};

class Status {
Expand Down Expand Up @@ -123,6 +124,10 @@ class Status {
return Status(StatusCode::OutOfMemory, msg);
}

static Status OutOfBound(const std::string &msg) {
return Status(StatusCode::OutOfMemory, msg);
}

static Status KeyError(const std::string &msg) {
return Status(StatusCode::KeyError, msg);
}
Expand Down
6 changes: 3 additions & 3 deletions docs/guide/xlang_serialization_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -225,8 +225,8 @@ class SomeClass2:

if __name__ == "__main__":
f = pyfury.Fury()
f.register_class(SomeClass1, type_tag="example.SomeClass1")
f.register_class(SomeClass2, type_tag="example.SomeClass2")
f.register_type(SomeClass1, typename="example.SomeClass1")
f.register_type(SomeClass2, typename="example.SomeClass2")
obj1 = SomeClass1(f1=True, f2={-1: 2})
obj = SomeClass2(
f1=obj1,
Expand Down Expand Up @@ -444,7 +444,7 @@ class SomeClass:
f3: Dict[str, str]

fury = pyfury.Fury(ref_tracking=True)
fury.register_class(SomeClass, type_tag="example.SomeClass")
fury.register_type(SomeClass, typename="example.SomeClass")
obj = SomeClass()
obj.f2 = {"k1": "v1", "k2": "v2"}
obj.f1, obj.f3 = obj, obj.f2
Expand Down
Loading
Loading