Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Require HeapSize for ParquetValueType #51

Merged
merged 1 commit into from
Jul 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions parquet/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -586,9 +586,9 @@ pub(crate) mod private {
use crate::encodings::decoding::PlainDecoderDetails;
use crate::util::bit_util::{read_num_bytes, BitReader, BitWriter};

use crate::basic::Type;

use super::{ParquetError, Result, SliceAsBytes};
use crate::basic::Type;
use crate::file::metadata::HeapSize;

/// Sealed trait to start to remove specialisation from implementations
///
Expand All @@ -606,6 +606,7 @@ pub(crate) mod private {
+ SliceAsBytes
+ PartialOrd
+ Send
+ HeapSize
+ crate::encodings::decoding::private::GetDecoder
+ crate::file::statistics::private::MakeStatistics
{
Expand Down Expand Up @@ -654,13 +655,6 @@ pub(crate) mod private {

/// Return the value as an mutable Any to allow for downcasts without transmutation
fn as_mut_any(&mut self) -> &mut dyn std::any::Any;

/// Returns the number of bytes of memory this instance uses on the heap.
///
/// Defaults to none (0)
fn heap_size(&self) -> usize {
0
}
}

impl ParquetValueType for bool {
Expand Down Expand Up @@ -893,6 +887,12 @@ pub(crate) mod private {
}
}

impl HeapSize for super::Int96 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}

impl ParquetValueType for super::ByteArray {
const PHYSICAL_TYPE: Type = Type::BYTE_ARRAY;

Expand Down Expand Up @@ -975,7 +975,9 @@ pub(crate) mod private {
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}
}

impl HeapSize for super::ByteArray {
fn heap_size(&self) -> usize {
// note: this is an estimate, not exact, so just return the size
// of the actual data used, don't try to handle the fact that it may
Expand Down Expand Up @@ -1068,7 +1070,9 @@ pub(crate) mod private {
fn as_mut_any(&mut self) -> &mut dyn std::any::Any {
self
}
}

impl HeapSize for super::FixedLenByteArray {
fn heap_size(&self) -> usize {
self.0.heap_size()
}
Expand Down
27 changes: 23 additions & 4 deletions parquet/src/file/metadata/memory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ use crate::format::{BoundaryOrder, PageLocation, SortingColumn};
use std::sync::Arc;

/// Trait for calculating the size of various containers
pub(crate) trait HeapSize {
pub trait HeapSize {
/// Return the size of any bytes allocated on the heap by this object,
/// including heap memory in those structures
///
Expand Down Expand Up @@ -176,11 +176,30 @@ impl<T: ParquetValueType> HeapSize for ValueStatistics<T> {
self.min().heap_size() + self.max().heap_size()
}
}
impl HeapSize for bool {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}
impl HeapSize for i32 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}
impl HeapSize for i64 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}

// Note this impl gets most primitive types like bool, i32, etc
impl<T: ParquetValueType> HeapSize for T {
impl HeapSize for f32 {
fn heap_size(&self) -> usize {
0 // no heap allocations
}
}
impl HeapSize for f64 {
fn heap_size(&self) -> usize {
self.heap_size()
0 // no heap allocations
}
}

Expand Down
Loading