Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support 'col IN (a, b, c)' type expressions #652

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
42 changes: 21 additions & 21 deletions kernel/src/engine/arrow_expression.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Expression handling based on arrow-rs compute kernels.
use std::borrow::Borrow;
use std::cmp::Ordering;
use std::collections::HashMap;
use std::sync::Arc;

Expand Down Expand Up @@ -290,9 +291,9 @@ fn evaluate_expression(
}

fn str_op<'a>(
column: impl Iterator<Item = Option<&'a str>> + 'a,
column: impl IntoIterator<Item = Option<&'a str>> + 'a,
) -> impl Iterator<Item = Option<Scalar>> + 'a {
column.map(|v| v.map(Scalar::from))
column.into_iter().map(|v| v.map(Scalar::from))
}

fn op_in(
Expand All @@ -303,15 +304,12 @@ fn evaluate_expression(
// it as such, ensuring correct handling of NULL inputs (including `Scalar::Null`).
values
.map(|v| {
Some(
PredicateEvaluatorDefaults::finish_eval_variadic(
VariadicOperator::Or,
inlist.iter().map(|k| v.as_ref().map(|vv| vv == k)),
false,
)
// None is returned when no dominant value (true) is found and there is at least one NULL
// In th case of IN, this is equivalent to false
.unwrap_or(false),
PredicateEvaluatorDefaults::finish_eval_variadic(
VariadicOperator::Or,
inlist
.iter()
.map(|k| Some(v.as_ref()?.partial_cmp(k)? == Ordering::Equal)),
false,
)
})
.collect()
Expand All @@ -333,16 +331,18 @@ fn evaluate_expression(

// safety: as_* methods on arrow arrays can panic, but we checked the data type before applying.
let arr = match (column.data_type(), data_type) {
(ArrowDataType::Utf8, PrimitiveType::String) => op_in(inlist, str_op(column.as_string::<i32>().iter())),
(ArrowDataType::LargeUtf8, PrimitiveType::String) => op_in(inlist, str_op(column.as_string::<i64>().iter())),
(ArrowDataType::Utf8View, PrimitiveType::String) => op_in(inlist, str_op(column.as_string_view().iter())),
(ArrowDataType::Int8, PrimitiveType::Byte) => op_in(inlist,op::<Int8Type>( column.as_ref(), Scalar::from)),
(ArrowDataType::Int16, PrimitiveType::Short) => op_in(inlist,op::<Int16Type>(column.as_ref(), Scalar::from)),
(ArrowDataType::Int32, PrimitiveType::Integer) => op_in(inlist,op::<Int32Type>(column.as_ref(), Scalar::from)),
(ArrowDataType::Int64, PrimitiveType::Long) => op_in(inlist,op::<Int64Type>(column.as_ref(), Scalar::from)),
(ArrowDataType::Float32, PrimitiveType::Float) => op_in(inlist,op::<Float32Type>(column.as_ref(), Scalar::from)),
(ArrowDataType::Float64, PrimitiveType::Double) => op_in(inlist,op::<Float64Type>(column.as_ref(), Scalar::from)),
(ArrowDataType::Date32, PrimitiveType::Date) => op_in(inlist,op::<Date32Type>(column.as_ref(), Scalar::Date)),
(ArrowDataType::Utf8, PrimitiveType::String) => op_in(inlist, str_op(column.as_string::<i32>())),
(ArrowDataType::LargeUtf8, PrimitiveType::String) => op_in(inlist, str_op(column.as_string::<i64>())),
(ArrowDataType::Utf8View, PrimitiveType::String) => op_in(inlist, str_op(column.as_string_view())),
(ArrowDataType::Int8, PrimitiveType::Byte) => op_in(inlist,op::<Int8Type>( &column, Scalar::from)),
(ArrowDataType::Int16, PrimitiveType::Short) => op_in(inlist,op::<Int16Type>(&column, Scalar::from)),
(ArrowDataType::Int32, PrimitiveType::Integer) => op_in(inlist,op::<Int32Type>(&column, Scalar::from)),
(ArrowDataType::Int64, PrimitiveType::Long) => op_in(inlist,op::<Int64Type>(&column, Scalar::from)),
(ArrowDataType::Float32, PrimitiveType::Float) => op_in(inlist,op::<Float32Type>(&column, Scalar::from)),
(ArrowDataType::Float64, PrimitiveType::Double) => {op_in(inlist,op::<Float64Type>(&column, Scalar::from))},
(ArrowDataType::Date32, PrimitiveType::Date) => {
op_in(inlist,op::<Date32Type>(&column, Scalar::Date))
},
(
ArrowDataType::Timestamp(TimeUnit::Microsecond, Some(_)),
PrimitiveType::Timestamp,
Expand Down
48 changes: 9 additions & 39 deletions kernel/src/expressions/scalars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,45 +224,9 @@ impl Display for Scalar {
}
}

impl PartialEq<Scalar> for Scalar {
fn eq(&self, other: &Self) -> bool {
use Scalar::*;
// NOTE: We intentionally do two match arms for each variant to avoid a catch-all, so
// that new variants trigger compilation failures instead of being silently ignored.
match (self, other) {
(Integer(a), Integer(b)) => a == b,
(Integer(_), _) => false,
(Long(a), Long(b)) => a == b,
(Long(_), _) => false,
(Short(a), Short(b)) => a == b,
(Short(_), _) => false,
(Byte(a), Byte(b)) => a == b,
(Byte(_), _) => false,
(Float(a), Float(b)) => a == b,
(Float(_), _) => false,
(Double(a), Double(b)) => a == b,
(Double(_), _) => false,
(String(a), String(b)) => a == b,
(String(_), _) => false,
(Boolean(a), Boolean(b)) => a == b,
(Boolean(_), _) => false,
(Timestamp(a), Timestamp(b)) => a == b,
(Timestamp(_), _) => false,
(TimestampNtz(a), TimestampNtz(b)) => a == b,
(TimestampNtz(_), _) => false,
(Date(a), Date(b)) => a == b,
(Date(_), _) => false,
(Binary(a), Binary(b)) => a == b,
(Binary(_), _) => false,
(Decimal(a, _, _), Decimal(b, _, _)) => a == b,
(Decimal(_, _, _), _) => false,
(Struct(a), Struct(b)) => a == b,
(Struct(_), _) => false,
(Array(a), Array(b)) => a == b,
(Array(_), _) => false,
(Null(_), Null(_)) => false, // NOTE: NULL values are incomparable by definition
(Null(_), _) => false,
}
impl PartialEq for Scalar {
fn eq(&self, other: &Scalar) -> bool {
self.partial_cmp(other) == Some(Ordering::Equal)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aside: Interesting, PartialOrd requires Self: PartialEq but we can still invoke the former from the latter?
(feels somehow like a circular dependency, but I guess if it compiles it works)

}
}

Expand Down Expand Up @@ -676,10 +640,13 @@ mod tests {
fn test_partial_cmp() {
let a = Scalar::Integer(1);
let b = Scalar::Integer(2);
roeap marked this conversation as resolved.
Show resolved Hide resolved
let c = Scalar::Null(DataType::INTEGER);
assert_eq!(a.partial_cmp(&b), Some(Ordering::Less));
assert_eq!(b.partial_cmp(&a), Some(Ordering::Greater));
assert_eq!(a.partial_cmp(&a), Some(Ordering::Equal));
assert_eq!(b.partial_cmp(&b), Some(Ordering::Equal));
assert_eq!(a.partial_cmp(&c), None);
assert_eq!(c.partial_cmp(&a), None);

// assert that NULL values are incomparable
let null = Scalar::Null(DataType::INTEGER);
Expand All @@ -690,8 +657,11 @@ mod tests {
fn test_partial_eq() {
let a = Scalar::Integer(1);
let b = Scalar::Integer(2);
let c = Scalar::Null(DataType::INTEGER);
assert!(!a.eq(&b));
assert!(a.eq(&a));
assert!(!a.eq(&c));
assert!(!c.eq(&a));

// assert that NULL values are incomparable
let null = Scalar::Null(DataType::INTEGER);
Expand Down
Loading