diff --git a/parquet/src/column/writer/encoder.rs b/parquet/src/column/writer/encoder.rs index 92d8f20d82f1..dc26b2049082 100644 --- a/parquet/src/column/writer/encoder.rs +++ b/parquet/src/column/writer/encoder.rs @@ -15,7 +15,9 @@ // specific language governing permissions and limitations // under the License. -use crate::basic::{Encoding, Type}; +use half::f16; + +use crate::basic::{Encoding, LogicalType, Type}; use crate::bloom_filter::Sbbf; use crate::column::writer::{ compare_greater, fallback_encoding, has_dictionary_support, is_nan, update_max, update_min, @@ -317,14 +319,14 @@ where // // For max, it has similar logic but will be written as 0.0 // (positive zero) - let min = replace_zero(min, -0.0); - let max = replace_zero(max, 0.0); + let min = replace_zero(min, descr, -0.0); + let max = replace_zero(max, descr, 0.0); Some((min, max)) } #[inline] -fn replace_zero(val: &T, replace: f32) -> T { +fn replace_zero(val: &T, descr: &ColumnDescriptor, replace: f32) -> T { match T::PHYSICAL_TYPE { Type::FLOAT if f32::from_le_bytes(val.as_bytes().try_into().unwrap()) == 0.0 => { T::try_from_le_slice(&f32::to_le_bytes(replace)).unwrap() @@ -332,6 +334,12 @@ fn replace_zero(val: &T, replace: f32) -> T { Type::DOUBLE if f64::from_le_bytes(val.as_bytes().try_into().unwrap()) == 0.0 => { T::try_from_le_slice(&f64::to_le_bytes(replace as f64)).unwrap() } + Type::FIXED_LEN_BYTE_ARRAY + if descr.logical_type() == Some(LogicalType::Float16) + && f16::from_le_bytes(val.as_bytes().try_into().unwrap()) == f16::NEG_ZERO => + { + T::try_from_le_slice(&f16::to_le_bytes(f16::from_f32(replace))).unwrap() + } _ => val.clone(), } } diff --git a/parquet/src/column/writer/mod.rs b/parquet/src/column/writer/mod.rs index 60427c9d332c..ceaa6996794c 100644 --- a/parquet/src/column/writer/mod.rs +++ b/parquet/src/column/writer/mod.rs @@ -2170,6 +2170,62 @@ mod tests { assert!(stats.is_min_max_backwards_compatible()); } + #[test] + fn test_float16_statistics_zero_only() { + let input = [f16::ZERO] + .into_iter() + .map(|s| ByteArray::from(s).into()) + .collect::>(); + + let stats = float16_statistics_roundtrip(&input); + assert!(stats.has_min_max_set()); + assert!(stats.is_min_max_backwards_compatible()); + assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO)); + assert_eq!(stats.max(), &ByteArray::from(f16::ZERO)); + } + + #[test] + fn test_float16_statistics_neg_zero_only() { + let input = [f16::NEG_ZERO] + .into_iter() + .map(|s| ByteArray::from(s).into()) + .collect::>(); + + let stats = float16_statistics_roundtrip(&input); + assert!(stats.has_min_max_set()); + assert!(stats.is_min_max_backwards_compatible()); + assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO)); + assert_eq!(stats.max(), &ByteArray::from(f16::ZERO)); + } + + #[test] + fn test_float16_statistics_zero_min() { + let input = [f16::ZERO, f16::ONE, f16::NAN, f16::PI] + .into_iter() + .map(|s| ByteArray::from(s).into()) + .collect::>(); + + let stats = float16_statistics_roundtrip(&input); + assert!(stats.has_min_max_set()); + assert!(stats.is_min_max_backwards_compatible()); + assert_eq!(stats.min(), &ByteArray::from(f16::NEG_ZERO)); + assert_eq!(stats.max(), &ByteArray::from(f16::PI)); + } + + #[test] + fn test_float16_statistics_neg_zero_max() { + let input = [f16::NEG_ZERO, f16::NEG_ONE, f16::NAN, -f16::PI] + .into_iter() + .map(|s| ByteArray::from(s).into()) + .collect::>(); + + let stats = float16_statistics_roundtrip(&input); + assert!(stats.has_min_max_set()); + assert!(stats.is_min_max_backwards_compatible()); + assert_eq!(stats.min(), &ByteArray::from(-f16::PI)); + assert_eq!(stats.max(), &ByteArray::from(f16::ZERO)); + } + #[test] fn test_float_statistics_nan_middle() { let stats = statistics_roundtrip::(&[1.0, f32::NAN, 2.0]);