Skip to content
This repository has been archived by the owner on Feb 18, 2024. It is now read-only.

Added try_new for mutable data types #1589

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions src/array/dictionary/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use std::hash::Hash;
use std::sync::Arc;

use crate::array::indexable::{AsIndexed, Indexable};
use crate::array::specification::{check_indexes, check_indexes_unchecked};
use crate::{
array::{primitive::MutablePrimitiveArray, Array, MutableArray, TryExtend, TryPush},
bitmap::MutableBitmap,
Expand Down Expand Up @@ -63,6 +64,32 @@ impl<K: DictionaryKey, M: MutableArray + Default> Default for MutableDictionaryA
}

impl<K: DictionaryKey, M: MutableArray> MutableDictionaryArray<K, M> {
/// Creates an [`MutableDictionaryArray`] from a given keys array and values array.
/// # Errors
/// Errors if the keys array have duplicates or if the keys are out of bounds of the values array.
pub fn try_new(data_type: DataType, keys: MutablePrimitiveArray<K>, values: M) -> Result<Self>
where
M: Indexable,
M::Type: Eq + Hash,
{
if keys.null_count() != keys.len() {
if K::always_fits_usize() {
// safety: we just checked that conversion to `usize` always
// succeeds
unsafe { check_indexes_unchecked(keys.values(), values.len()) }?;
} else {
check_indexes(keys.values(), values.len())?;
}
}

let map = ValueMap::<K, M>::from_values(values)?;
Ok(Self {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm... shouldn't data_type be validated here?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, I think what you need to check is that ValueMap's resulting data type is the same as data type being passed in?

data_type,
map,
keys,
})
}

/// Creates an empty [`MutableDictionaryArray`] from a given empty values array.
/// # Errors
/// Errors if the array is non-empty.
Expand Down
51 changes: 51 additions & 0 deletions src/array/fixed_size_list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,57 @@ impl<M: MutableArray> From<MutableFixedSizeListArray<M>> for FixedSizeListArray
}

impl<M: MutableArray> MutableFixedSizeListArray<M> {
/// Creates a new [`MutableFixedSizeListArray`].
///
/// # Errors
/// This function returns an error iff:
/// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`]
/// * The `data_type`'s inner field's data type is not equal to `values.data_type`.
/// * The length of `values` is not a multiple of `size` in `data_type`
/// * the validity's length is not equal to `values.len() / size`.
pub fn try_new(
data_type: DataType,
values: M,
validity: Option<MutableBitmap>,
) -> Result<Self> {
let (child, size) = FixedSizeListArray::try_child_and_size(&data_type)?;

let child_data_type = &child.data_type;
let values_data_type = values.data_type();
if child_data_type != values_data_type {
return Err(Error::oos(
format!(
"MutableFixedSizeListArray's child's DataType must match. However, the expected DataType is {child_data_type:?} while it got {values_data_type:?}.",
)
));
}

if values.len() % size != 0 {
return Err(Error::oos(format!(
"values (of len {}) must be a multiple of size ({}) in MutableFixedSizeListArray.",
values.len(),
size
)));
}
let len = values.len() / size;

if validity
.as_ref()
.map_or(false, |validity| validity.len() != len)
{
return Err(Error::oos(
"validity mask length must be equal to the number of values divided by size",
));
}

Ok(Self {
data_type,
size,
values,
validity,
})
}

/// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size.
pub fn new(values: M, size: usize) -> Self {
let data_type = FixedSizeListArray::default_datatype(values.data_type().clone(), size);
Expand Down
44 changes: 43 additions & 1 deletion src/array/list/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use crate::{
trusted_len::TrustedLen,
};

use super::ListArray;
use super::{try_check_offsets_bounds, ListArray};

/// The mutable version of [`ListArray`].
#[derive(Debug, Clone)]
Expand All @@ -24,6 +24,48 @@ pub struct MutableListArray<O: Offset, M: MutableArray> {
}

impl<O: Offset, M: MutableArray + Default> MutableListArray<O, M> {
/// Creates a new [`MutableListArray`].
///
/// # Errors
/// This function returns an error iff:
/// * The last offset is not equal to the values' length.
/// * the validity's length is not equal to `offsets.len()`.
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`].
/// * The `data_type`'s inner field's data type is not equal to `values.data_type`.
/// # Implementation
/// This function is `O(1)`
pub fn try_new(
data_type: DataType,
offsets: Offsets<O>,
values: M,
validity: Option<MutableBitmap>,
) -> Result<Self> {
try_check_offsets_bounds(&offsets, values.len())?;

if validity
.as_ref()
.map_or(false, |validity| validity.len() != offsets.len_proxy())
{
return Err(Error::oos(
"validity mask length must match the number of values",
));
}

let child_data_type = ListArray::<O>::try_get_child(&data_type)?.data_type();
let values_data_type = values.data_type();
if child_data_type != values_data_type {
return Err(Error::oos(
format!("ListArray's child's DataType must match. However, the expected DataType is {child_data_type:?} while it got {values_data_type:?}."),
));
}
Ok(Self {
data_type,
offsets,
values,
validity,
})
}

/// Creates a new empty [`MutableListArray`].
pub fn new() -> Self {
let values = M::default();
Expand Down
10 changes: 10 additions & 0 deletions src/array/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ pub struct MutableNullArray {
}

impl MutableNullArray {
/// Returns a new [`MutableNullArray`].
/// # Errors
/// This function errors iff:
/// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`].
pub fn try_new(data_type: DataType, length: usize) -> Result<Self, Error> {
Ok(Self {
inner: NullArray::try_new(data_type, length)?,
})
}

/// Returns a new [`MutableNullArray`].
/// # Panics
/// This function errors iff:
Expand Down
14 changes: 14 additions & 0 deletions src/array/primitive/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,20 @@ impl<T: NativeType> MutablePrimitiveArray<T> {
pub fn apply_values<F: Fn(&mut [T])>(&mut self, f: F) {
f(&mut self.values);
}

/// The number of null slots on this [`Array`].
/// # Implementation
/// This is `O(1)` since the number of null elements is pre-computed.
#[inline]
pub fn null_count(&self) -> usize {
if self.data_type() == &DataType::Null {
return self.len();
};
self.validity()
.as_ref()
.map(|x| x.unset_bits())
.unwrap_or(0)
}
}

impl<T: NativeType> Default for MutablePrimitiveArray<T> {
Expand Down