From 91acfb07a9929a2d6721c5417e47c0c472372a86 Mon Sep 17 00:00:00 2001 From: Raphael Taylor-Davies <1781103+tustvold@users.noreply.github.com> Date: Mon, 6 Nov 2023 14:01:47 +0000 Subject: [PATCH] Fix serialization of large integers (#5038) (#5042) --- arrow-json/src/reader/mod.rs | 30 ++++++++++++++++++++++++ arrow-json/src/reader/primitive_array.rs | 2 +- arrow-json/src/reader/tape.rs | 2 +- arrow-json/src/reader/timestamp_array.rs | 4 +++- 4 files changed, 35 insertions(+), 3 deletions(-) diff --git a/arrow-json/src/reader/mod.rs b/arrow-json/src/reader/mod.rs index 28282c4d1541..71a73df9fedb 100644 --- a/arrow-json/src/reader/mod.rs +++ b/arrow-json/src/reader/mod.rs @@ -2229,4 +2229,34 @@ mod tests { let values = b.column(0).as_primitive::().values(); assert_eq!(values, &[1, 2, 3, 4]); } + + #[test] + fn test_serde_large_numbers() { + let field = Field::new("int", DataType::Int64, true); + let mut decoder = ReaderBuilder::new_with_field(field) + .build_decoder() + .unwrap(); + + decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap(); + let b = decoder.flush().unwrap().unwrap(); + let values = b.column(0).as_primitive::().values(); + assert_eq!(values, &[1699148028689, 2, 3, 4]); + + let field = Field::new( + "int", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + ); + let mut decoder = ReaderBuilder::new_with_field(field) + .build_decoder() + .unwrap(); + + decoder.serialize(&[1699148028689_u64, 2, 3, 4]).unwrap(); + let b = decoder.flush().unwrap().unwrap(); + let values = b + .column(0) + .as_primitive::() + .values(); + assert_eq!(values, &[1699148028689, 2, 3, 4]); + } } diff --git a/arrow-json/src/reader/primitive_array.rs b/arrow-json/src/reader/primitive_array.rs index a03a41e96dcb..1bd1176131ae 100644 --- a/arrow-json/src/reader/primitive_array.rs +++ b/arrow-json/src/reader/primitive_array.rs @@ -142,7 +142,7 @@ where }, TapeElement::I64(high) => match tape.get(p + 1) { TapeElement::I32(low) => { - let v = (high as i64) << 32 | low as i64; + let v = (high as i64) << 32 | (low as u32) as i64; let value = NumCast::from(v).ok_or_else(|| { ArrowError::JsonError(format!("failed to parse {v} as {d}",)) })?; diff --git a/arrow-json/src/reader/tape.rs b/arrow-json/src/reader/tape.rs index 4822ad0bf43d..c783f6a51022 100644 --- a/arrow-json/src/reader/tape.rs +++ b/arrow-json/src/reader/tape.rs @@ -180,7 +180,7 @@ impl<'a> Tape<'a> { TapeElement::Null => out.push_str("null"), TapeElement::I64(high) => match self.get(idx + 1) { TapeElement::I32(low) => { - let val = (high as i64) << 32 | low as i64; + let val = (high as i64) << 32 | (low as u32) as i64; let _ = write!(out, "{val}"); return idx + 2; } diff --git a/arrow-json/src/reader/timestamp_array.rs b/arrow-json/src/reader/timestamp_array.rs index dda5a653d730..f68fc3dc3270 100644 --- a/arrow-json/src/reader/timestamp_array.rs +++ b/arrow-json/src/reader/timestamp_array.rs @@ -96,7 +96,9 @@ where } TapeElement::I32(v) => builder.append_value(v as i64), TapeElement::I64(high) => match tape.get(p + 1) { - TapeElement::I32(low) => builder.append_value((high as i64) << 32 | low as i64), + TapeElement::I32(low) => { + builder.append_value((high as i64) << 32 | (low as u32) as i64) + } _ => unreachable!(), }, _ => return Err(tape.error(*p, "primitive")),