From df8366d3a690a8f4f618fc0d5b37229e5a052159 Mon Sep 17 00:00:00 2001 From: Aryansh Omray Date: Mon, 6 Nov 2023 17:02:22 +0530 Subject: [PATCH 1/5] add try_new for MutableDictionaryArray --- src/array/dictionary/mutable.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/array/dictionary/mutable.rs b/src/array/dictionary/mutable.rs index b48a57a945..fe2a32a5a9 100644 --- a/src/array/dictionary/mutable.rs +++ b/src/array/dictionary/mutable.rs @@ -2,6 +2,7 @@ use std::hash::Hash; use std::sync::Arc; use crate::array::indexable::{AsIndexed, Indexable}; +use crate::array::specification::{check_indexes, check_indexes_unchecked}; use crate::{ array::{primitive::MutablePrimitiveArray, Array, MutableArray, TryExtend, TryPush}, bitmap::MutableBitmap, @@ -63,6 +64,32 @@ impl Default for MutableDictionaryA } impl MutableDictionaryArray { + /// Creates an [`MutableDictionaryArray`] from a given keys array and values array. + /// # Errors + /// Errors if the keys array have duplicates or if the keys are out of bounds of the values array. + pub fn try_new(data_type: DataType, keys: MutablePrimitiveArray, values: M) -> Result + where + M: Indexable, + M::Type: Eq + Hash, + { + if keys.null_count() != keys.len() { + if K::always_fits_usize() { + // safety: we just checked that conversion to `usize` always + // succeeds + unsafe { check_indexes_unchecked(keys.values(), values.len()) }?; + } else { + check_indexes(keys.values(), values.len())?; + } + } + + let map = ValueMap::::from_values(values)?; + Ok(Self { + data_type, + map, + keys, + }) + } + /// Creates an empty [`MutableDictionaryArray`] from a given empty values array. /// # Errors /// Errors if the array is non-empty. From 5bc4f1993cb3658590e16c434050e1d3e9c2da5d Mon Sep 17 00:00:00 2001 From: Aryansh Omray Date: Mon, 6 Nov 2023 17:02:32 +0530 Subject: [PATCH 2/5] add try_new for MutableFixedSizeListArray --- src/array/fixed_size_list/mutable.rs | 51 ++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/array/fixed_size_list/mutable.rs b/src/array/fixed_size_list/mutable.rs index 1e387a2f70..51dfbfe2e8 100644 --- a/src/array/fixed_size_list/mutable.rs +++ b/src/array/fixed_size_list/mutable.rs @@ -33,6 +33,57 @@ impl From> for FixedSizeListArray } impl MutableFixedSizeListArray { + /// Creates a new [`MutableFixedSizeListArray`]. + /// + /// # Errors + /// This function returns an error iff: + /// * The `data_type`'s physical type is not [`crate::datatypes::PhysicalType::FixedSizeList`] + /// * The `data_type`'s inner field's data type is not equal to `values.data_type`. + /// * The length of `values` is not a multiple of `size` in `data_type` + /// * the validity's length is not equal to `values.len() / size`. + pub fn try_new( + data_type: DataType, + values: M, + validity: Option, + ) -> Result { + let (child, size) = FixedSizeListArray::try_child_and_size(&data_type)?; + + let child_data_type = &child.data_type; + let values_data_type = values.data_type(); + if child_data_type != values_data_type { + return Err(Error::oos( + format!( + "MutableFixedSizeListArray's child's DataType must match. However, the expected DataType is {child_data_type:?} while it got {values_data_type:?}.", + ) + )); + } + + if values.len() % size != 0 { + return Err(Error::oos(format!( + "values (of len {}) must be a multiple of size ({}) in MutableFixedSizeListArray.", + values.len(), + size + ))); + } + let len = values.len() / size; + + if validity + .as_ref() + .map_or(false, |validity| validity.len() != len) + { + return Err(Error::oos( + "validity mask length must be equal to the number of values divided by size", + )); + } + + Ok(Self { + data_type, + size, + values, + validity, + }) + } + /// Creates a new [`MutableFixedSizeListArray`] from a [`MutableArray`] and size. pub fn new(values: M, size: usize) -> Self { let data_type = FixedSizeListArray::default_datatype(values.data_type().clone(), size); From f943511f1201a91babec00cf3552b5726e817ea3 Mon Sep 17 00:00:00 2001 From: Aryansh Omray Date: Mon, 6 Nov 2023 17:02:49 +0530 Subject: [PATCH 3/5] add try_new for MutableListArray --- src/array/list/mutable.rs | 44 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/src/array/list/mutable.rs b/src/array/list/mutable.rs index d24475e86d..0e4af4a2ae 100644 --- a/src/array/list/mutable.rs +++ b/src/array/list/mutable.rs @@ -12,7 +12,7 @@ use crate::{ trusted_len::TrustedLen, }; -use super::ListArray; +use super::{try_check_offsets_bounds, ListArray}; /// The mutable version of [`ListArray`]. #[derive(Debug, Clone)] @@ -24,6 +24,48 @@ pub struct MutableListArray { } impl MutableListArray { + /// Creates a new [`MutableListArray`]. + /// + /// # Errors + /// This function returns an error iff: + /// * The last offset is not equal to the values' length. + /// * the validity's length is not equal to `offsets.len()`. + /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to either [`crate::datatypes::PhysicalType::List`] or [`crate::datatypes::PhysicalType::LargeList`]. + /// * The `data_type`'s inner field's data type is not equal to `values.data_type`. + /// # Implementation + /// This function is `O(1)` + pub fn try_new( + data_type: DataType, + offsets: Offsets, + values: M, + validity: Option, + ) -> Result { + try_check_offsets_bounds(&offsets, values.len())?; + + if validity + .as_ref() + .map_or(false, |validity| validity.len() != offsets.len_proxy()) + { + return Err(Error::oos( + "validity mask length must match the number of values", + )); + } + + let child_data_type = ListArray::::try_get_child(&data_type)?.data_type(); + let values_data_type = values.data_type(); + if child_data_type != values_data_type { + return Err(Error::oos( + format!("ListArray's child's DataType must match. However, the expected DataType is {child_data_type:?} while it got {values_data_type:?}."), + )); + } + Ok(Self { + data_type, + offsets, + values, + validity, + }) + } + /// Creates a new empty [`MutableListArray`]. pub fn new() -> Self { let values = M::default(); From 747005eb25350bdc5471e2a481c6e2f7b50d4ee7 Mon Sep 17 00:00:00 2001 From: Aryansh Omray Date: Mon, 6 Nov 2023 17:03:00 +0530 Subject: [PATCH 4/5] add try_new for MutableNullArray --- src/array/null.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/array/null.rs b/src/array/null.rs index bcd5c0aff7..5a23b8f5ec 100644 --- a/src/array/null.rs +++ b/src/array/null.rs @@ -99,6 +99,16 @@ pub struct MutableNullArray { } impl MutableNullArray { + /// Returns a new [`MutableNullArray`]. + /// # Errors + /// This function errors iff: + /// * The `data_type`'s [`crate::datatypes::PhysicalType`] is not equal to [`crate::datatypes::PhysicalType::Null`]. + pub fn try_new(data_type: DataType, length: usize) -> Result { + Ok(Self { + inner: NullArray::try_new(data_type, length)?, + }) + } + /// Returns a new [`MutableNullArray`]. /// # Panics /// This function errors iff: From ac5f5d7a9aac6519a2b0cd7bd5400bb3c403bc10 Mon Sep 17 00:00:00 2001 From: Aryansh Omray Date: Mon, 6 Nov 2023 18:11:52 +0530 Subject: [PATCH 5/5] add null_count for MutablePrimitiveArray --- src/array/primitive/mutable.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/array/primitive/mutable.rs b/src/array/primitive/mutable.rs index 4432ab2e33..9f71e066fc 100644 --- a/src/array/primitive/mutable.rs +++ b/src/array/primitive/mutable.rs @@ -93,6 +93,20 @@ impl MutablePrimitiveArray { pub fn apply_values(&mut self, f: F) { f(&mut self.values); } + + /// The number of null slots on this [`Array`]. + /// # Implementation + /// This is `O(1)` since the number of null elements is pre-computed. + #[inline] + pub fn null_count(&self) -> usize { + if self.data_type() == &DataType::Null { + return self.len(); + }; + self.validity() + .as_ref() + .map(|x| x.unset_bits()) + .unwrap_or(0) + } } impl Default for MutablePrimitiveArray {