Skip to content

Commit

Permalink
deduplicate variadic buffers in MutableArrayData::extend for ByteView…
Browse files Browse the repository at this point in the history
… arrays
  • Loading branch information
onursatici committed Nov 27, 2024
1 parent 93e6749 commit 6472dd8
Showing 1 changed file with 29 additions and 23 deletions.
52 changes: 29 additions & 23 deletions arrow-data/src/transform/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use arrow_buffer::{bit_util, i256, ArrowNativeType, Buffer, MutableBuffer};
use arrow_schema::{ArrowError, DataType, IntervalUnit, UnionMode};
use half::f16;
use num::Integer;
use std::collections::HashMap;
use std::mem;

mod boolean;
Expand Down Expand Up @@ -214,7 +215,7 @@ fn build_extend_dictionary(array: &ArrayData, offset: usize, max: usize) -> Opti
}

/// Builds an extend that adds `buffer_offset` to any buffer indices encountered
fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend {
fn build_extend_view(array: &ArrayData, buffer_lookup: Vec<usize>) -> Extend {
let views = array.buffer::<u128>(0);
Box::new(
move |mutable: &mut _MutableArrayData, _, start: usize, len: usize| {
Expand All @@ -226,7 +227,7 @@ fn build_extend_view(array: &ArrayData, buffer_offset: u32) -> Extend {
return *v; // Stored inline
}
let mut view = ByteView::from(*v);
view.buffer_index += buffer_offset;
view.buffer_index = buffer_lookup[view.buffer_index as usize] as u32;
view.into()
}))
},
Expand Down Expand Up @@ -627,13 +628,19 @@ impl<'a> MutableArrayData<'a> {
_ => (None, false),
};

let variadic_data_buffers = match &data_type {
DataType::BinaryView | DataType::Utf8View => arrays
.iter()
.flat_map(|x| x.buffers().iter().skip(1))
.map(Buffer::clone)
.collect(),
_ => vec![],
let (variadic_data_buffers, buffer_to_idx) = match &data_type {
DataType::BinaryView | DataType::Utf8View => {
let mut buffer_to_idx = HashMap::new();
let mut variadic_buffers = Vec::new();
for buffer in arrays.iter().flat_map(|x| x.buffers().iter().skip(1)) {
buffer_to_idx.entry(buffer.as_ptr()).or_insert_with(|| {
variadic_buffers.push(buffer.clone());
variadic_buffers.len() - 1
});
}
(variadic_buffers, buffer_to_idx)
}
_ => (vec![], HashMap::new()),
};

let extend_nulls = build_extend_nulls(data_type);
Expand Down Expand Up @@ -668,20 +675,19 @@ impl<'a> MutableArrayData<'a> {

extend_values.expect("MutableArrayData::new is infallible")
}
DataType::BinaryView | DataType::Utf8View => {
let mut next_offset = 0u32;
arrays
.iter()
.map(|arr| {
let num_data_buffers = (arr.buffers().len() - 1) as u32;
let offset = next_offset;
next_offset = next_offset
.checked_add(num_data_buffers)
.expect("view buffer index overflow");
build_extend_view(arr, offset)
})
.collect()
}
DataType::BinaryView | DataType::Utf8View => arrays
.iter()
.map(|arr| {
let arr_to_res_buffer_idx: Vec<_> = arr
.buffers()
.iter()
.skip(1)
.map(|buf| *buffer_to_idx.get(&buf.as_ptr()).unwrap())
.collect();

build_extend_view(arr, arr_to_res_buffer_idx)
})
.collect(),
_ => arrays.iter().map(|array| build_extend(array)).collect(),
};

Expand Down

0 comments on commit 6472dd8

Please sign in to comment.