Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Buffer interoperability with arrow-rs (jorgecarleitao#1429)
Browse files Browse the repository at this point in the history
tustvold committed Mar 17, 2023
1 parent 15f5c52 commit 94d22a1
Showing 5 changed files with 78 additions and 4 deletions.
9 changes: 7 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -6,7 +6,7 @@ description = "Unofficial implementation of Apache Arrow spec in safe Rust"
homepage = "https://github.com/jorgecarleitao/arrow2"
repository = "https://github.com/jorgecarleitao/arrow2"
authors = ["Jorge C. Leitao <jorgecarleitao@gmail.com>", "Apache Arrow <dev@arrow.apache.org>"]
keywords = [ "arrow", "analytics" ]
keywords = ["arrow", "analytics"]
edition = "2021"
exclude = ["testing/"]

@@ -100,6 +100,9 @@ odbc-api = { version = "0.36", optional = true }
# Faster hashing
ahash = "0.8"

# Support conversion to/from arrow-rs
arrow-buffer = { version = "35.0.0", optional = true }

[target.wasm32-unknown-unknown.dependencies]
getrandom = { version = "0.2", features = ["js"] }

@@ -131,6 +134,7 @@ rustdoc-args = ["--cfg", "docsrs"]
[features]
default = []
full = [
"arrow",
"io_odbc",
"io_csv",
"io_csv_async",
@@ -154,6 +158,7 @@ full = [
# parses timezones used in timestamp conversions
"chrono-tz",
]
arrow = ["arrow-buffer"]
io_odbc = ["odbc-api"]
io_csv = ["io_csv_read", "io_csv_write"]
io_csv_async = ["io_csv_read_async"]
@@ -195,7 +200,7 @@ io_avro_compression = [
]
io_avro_async = ["avro-schema/async"]

io_orc = [ "orc-format" ]
io_orc = ["orc-format"]

# serde+serde_json: its dependencies + error handling
# serde_derive: there is some derive around
4 changes: 3 additions & 1 deletion src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
@@ -415,7 +415,9 @@ impl IntoIterator for Bitmap {
impl From<Bitmap> for arrow_buffer::buffer::NullBuffer {
fn from(value: Bitmap) -> Self {
let null_count = value.unset_bits;
let buffer = crate::buffer::to_buffer(value.bytes);
let buffer = arrow_buffer::buffer::BooleanBuffer::new(buffer, value.offset, value.length);
// Safety: null count is accurate
unsafe { arrow_buffer::buffer::NullBuffer::new_unchecked(value.into(), null_count) }
unsafe { arrow_buffer::buffer::NullBuffer::new_unchecked(buffer, null_count) }
}
}
3 changes: 2 additions & 1 deletion src/buffer/mod.rs
Original file line number Diff line number Diff line change
@@ -28,7 +28,8 @@ pub(crate) fn to_buffer<T: crate::types::NativeType>(
#[cfg(feature = "arrow")]
pub(crate) fn to_bytes<T: crate::types::NativeType>(value: arrow_buffer::Buffer) -> Bytes<T> {
let ptr = value.as_ptr();
assert_eq!(ptr.align_offset(std::mem::align_of::<T>()), 0,);
let align = ptr.align_offset(std::mem::align_of::<T>());
assert_eq!(align, 0, "not aligned");
let len = value.len() / std::mem::size_of::<T>();

// Valid as `NativeType: Pod` and checked alignment above
26 changes: 26 additions & 0 deletions tests/it/bitmap/immutable.rs
Original file line number Diff line number Diff line change
@@ -39,3 +39,29 @@ fn debug() {

assert_eq!(format!("{b:?}"), "[0b111110__, 0b_______1]");
}

#[test]
#[cfg(feature = "arrow")]
fn from_arrow() {
use arrow_buffer::buffer::{BooleanBuffer, NullBuffer};
let buffer = arrow_buffer::Buffer::from_iter(vec![true, true, true, false, false, false, true]);
let bools = BooleanBuffer::new(buffer, 0, 7);
let nulls = NullBuffer::new(bools);
assert_eq!(nulls.null_count(), 3);

let bitmap = Bitmap::from_null_buffer(nulls.clone());
assert_eq!(nulls.null_count(), bitmap.unset_bits());
assert_eq!(nulls.len(), bitmap.len());
let back = NullBuffer::from(bitmap);
assert_eq!(nulls, back);

let nulls = nulls.slice(1, 3);
assert_eq!(nulls.null_count(), 1);
assert_eq!(nulls.len(), 3);

let bitmap = Bitmap::from_null_buffer(nulls.clone());
assert_eq!(nulls.null_count(), bitmap.unset_bits());
assert_eq!(nulls.len(), bitmap.len());
let back = NullBuffer::from(bitmap);
assert_eq!(nulls, back);
}
40 changes: 40 additions & 0 deletions tests/it/buffer/immutable.rs
Original file line number Diff line number Diff line change
@@ -43,3 +43,43 @@ fn from_vec() {
assert_eq!(buffer.len(), 3);
assert_eq!(buffer.as_slice(), &[0, 1, 2]);
}

#[test]
#[cfg(feature = "arrow")]
fn from_arrow() {
let buffer = arrow_buffer::Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]);
let b = Buffer::<i32>::from(buffer.clone());
assert_eq!(b.len(), 3);
assert_eq!(b.as_slice(), &[1, 2, 3]);
let back = arrow_buffer::Buffer::from(b);
assert_eq!(back, buffer);

let buffer = buffer.slice(4);
let b = Buffer::<i32>::from(buffer.clone());
assert_eq!(b.len(), 2);
assert_eq!(b.as_slice(), &[2, 3]);
let back = arrow_buffer::Buffer::from(b);
assert_eq!(back, buffer);

let buffer = arrow_buffer::Buffer::from_vec(vec![1_i64, 2_i64]);
let b = Buffer::<i32>::from(buffer.clone());
assert_eq!(b.len(), 4);
assert_eq!(b.as_slice(), &[1, 0, 2, 0]);
let back = arrow_buffer::Buffer::from(b);
assert_eq!(back, buffer);

let buffer = buffer.slice(4);
let b = Buffer::<i32>::from(buffer.clone());
assert_eq!(b.len(), 3);
assert_eq!(b.as_slice(), &[0, 2, 0]);
let back = arrow_buffer::Buffer::from(b);
assert_eq!(back, buffer);
}

#[test]
#[cfg(feature = "arrow")]
#[should_panic(expected = "not aligned")]
fn from_arrow_misaligned() {
let buffer = arrow_buffer::Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]).slice(1);
let _ = Buffer::<i32>::from(buffer.clone());
}

0 comments on commit 94d22a1

Please sign in to comment.