diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index 511a8375d..84992d1f9 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -817,6 +817,7 @@ message Expression { int64 time = 17; IntervalYearToMonth interval_year_to_month = 19; IntervalDayToSecond interval_day_to_second = 20; + IntervalCompound interval_compound = 36; string fixed_char = 21; VarChar var_char = 22; bytes fixed_binary = 23; @@ -888,7 +889,21 @@ message Expression { message IntervalDayToSecond { int32 days = 1; int32 seconds = 2; - int32 microseconds = 3; + + // Consumers should expect either (miroseconds) to be set or (precision and subseconds) to be set + oneof precision_mode { + int32 microseconds = 3 [deprecated = true]; // use precision and subseconds below, they cover and replace microseconds. + // Sub-second precision, 0 means the value given is in seconds, 3 is milliseconds, 6 microseconds, 9 is nanoseconds. Should be used with subseconds below. + int32 precision = 4; + } + + // the number of fractional seconds using 1e(-precision) units. Should only be used with precision field, not microseconds. + int64 subseconds = 5; + } + + message IntervalCompound { + IntervalYearToMonth interval_year_to_month = 1; + IntervalDayToSecond interval_day_to_second = 2; } message Struct { diff --git a/proto/substrait/parameterized_types.proto b/proto/substrait/parameterized_types.proto index 51d9c0d68..06462993f 100644 --- a/proto/substrait/parameterized_types.proto +++ b/proto/substrait/parameterized_types.proto @@ -26,7 +26,8 @@ message ParameterizedType { Type.Date date = 16; Type.Time time = 17; Type.IntervalYear interval_year = 19; - Type.IntervalDay interval_day = 20; + ParameterizedIntervalDay interval_day = 20; + ParameterizedIntervalCompound interval_compound = 36; // Deprecated in favor of `ParameterizedPrecisionTimestampTZ precision_timestamp_tz` Type.TimestampTZ timestamp_tz = 29 [deprecated = true]; Type.UUID uuid = 32; @@ -92,6 +93,18 @@ message ParameterizedType { Type.Nullability nullability = 4; } + message ParameterizedIntervalDay { + IntegerOption precision = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ParameterizedIntervalCompound { + IntegerOption precision = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + message ParameterizedPrecisionTimestamp { IntegerOption precision = 1; uint32 variation_pointer = 2; diff --git a/proto/substrait/type.proto b/proto/substrait/type.proto index 431d2158e..19e49588a 100644 --- a/proto/substrait/type.proto +++ b/proto/substrait/type.proto @@ -27,6 +27,7 @@ message Type { Time time = 17; IntervalYear interval_year = 19; IntervalDay interval_day = 20; + IntervalCompound interval_compound = 35; // Deprecated in favor of `PrecisionTimestampTZ precision_timestamp_tz` TimestampTZ timestamp_tz = 29 [deprecated = true]; UUID uuid = 32; @@ -122,14 +123,28 @@ message Type { Nullability nullability = 2; } + // An interval consisting of years and months message IntervalYear { uint32 type_variation_reference = 1; Nullability nullability = 2; } + // An interval consisting of days, seconds, and microseconds message IntervalDay { uint32 type_variation_reference = 1; Nullability nullability = 2; + + // Sub-second precision, 0 means the value given is in seconds, 3 is milliseconds, 6 microseconds, 9 is nanoseconds, etc. + // if unset, treat as 6. + optional int32 precision = 3; + } + + // An interval consisting of the components of both IntervalMonth and IntervalDay + message IntervalCompound { + uint32 type_variation_reference = 1; + Nullability nullability = 2; + // Sub-second precision, 0 means the value given is in seconds, 3 is milliseconds, 6 microseconds, 9 is nanoseconds, etc. + int32 precision = 3; } message UUID { diff --git a/proto/substrait/type_expressions.proto b/proto/substrait/type_expressions.proto index 6b59121d9..8ba4b902b 100644 --- a/proto/substrait/type_expressions.proto +++ b/proto/substrait/type_expressions.proto @@ -26,11 +26,12 @@ message DerivationExpression { Type.Date date = 16; Type.Time time = 17; Type.IntervalYear interval_year = 19; - Type.IntervalDay interval_day = 20; // Deprecated in favor of `ExpressionPrecisionTimestampTZ precision_timestamp_tz` Type.TimestampTZ timestamp_tz = 29 [deprecated = true]; Type.UUID uuid = 32; + ExpressionIntervalDay interval_day = 20; + ExpressionIntervalCompound interval_compound = 42; ExpressionFixedChar fixed_char = 21; ExpressionVarChar varchar = 22; ExpressionFixedBinary fixed_binary = 23; @@ -90,6 +91,18 @@ message DerivationExpression { Type.Nullability nullability = 3; } + message ExpressionIntervalDay { + DerivationExpression precision = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + + message ExpressionIntervalCompound { + DerivationExpression precision = 1; + uint32 variation_pointer = 2; + Type.Nullability nullability = 3; + } + message ExpressionPrecisionTimestampTZ { DerivationExpression precision = 1; uint32 variation_pointer = 2; diff --git a/site/docs/extensions/index.md b/site/docs/extensions/index.md index 9a3129aaf..bba436788 100644 --- a/site/docs/extensions/index.md +++ b/site/docs/extensions/index.md @@ -76,6 +76,7 @@ Rather than using a full data type representation, the input argument types (`sh | time | time | | interval_year | iyear | | interval_day | iday | +| interval_compound | icompound | | uuid | uuid | | fixedchar<N> | fchar | | varchar<N> | vchar | diff --git a/site/docs/types/type_classes.md b/site/docs/types/type_classes.md index 26233493a..bea7adeb9 100644 --- a/site/docs/types/type_classes.md +++ b/site/docs/types/type_classes.md @@ -24,7 +24,6 @@ Simple type classes are those that don't support any form of configuration. For | date | A date within [1000-01-01..9999-12-31]. | `int32` days since `1970-01-01` | time | A time since the beginning of any day. Range of [0..86,399,999,999] microseconds; leap seconds need not be supported. | `int64` microseconds past midnight | interval_year | Interval year to month. Supports a range of [-10,000..10,000] years with month precision (= [-120,000..120,000] months). Usually stored as separate integers for years and months, but only the total number of months is significant, i.e. `1y 0m` is considered equal to `0y 12m` or `1001y -12000m`. | `int32` years and `int32` months, with the added constraint that each component can never independently specify more than 10,000 years, even if the components have opposite signs (e.g. `-10000y 200000m` is **not** allowed) -| interval_day | Interval day to second. Supports a range of [-3,650,000..3,650,000] days with microsecond precision (= [-315,360,000,000,000,000..315,360,000,000,000,000] microseconds). Usually stored as separate integers for various components, but only the total number of microseconds is significant, i.e. `1d 0s` is considered equal to `0d 86400s`. | `int32` days, `int32` seconds, and `int32` microseconds, with the added constraint that each component can never independently specify more than 10,000 years, even if the components have opposite signs (e.g. `3650001d -86400s 0us` is **not** allowed) | uuid | A universally-unique identifier composed of 128 bits. Typically presented to users in the following hexadecimal format: `c48ffa9e-64f4-44cb-ae47-152b4e60e77b`. Any 128-bit value is allowed, without specific adherence to RFC4122. | 16-byte `binary` ## Compound Types @@ -43,6 +42,8 @@ Compound type classes are type classes that need to be configured by means of a | MAP<K, V> | An unordered list of type K keys with type V values. Keys may be repeated. While the key type could be nullable, keys may not be null. | `repeated KeyValue` (in turn two `Literal`s), all key types matching K and all value types matching V | PRECISIONTIMESTAMP<P> | A timestamp with fractional second precision (P, number of digits) 0 <= P <= 9. Does not include timezone information and can thus not be unambiguously mapped to a moment on the timeline without context. Similar to naive datetime in Python. | `int64` seconds, milliseconds, microseconds or nanoseconds since 1970-01-01 00:00:00.000000000 (in an unspecified timezone) | PRECISIONTIMESTAMPTZ<P> | A timezone-aware timestamp, with fractional second precision (P, number of digits) 0 <= P <= 9. Similar to aware datetime in Python. | `int64` seconds, milliseconds, microseconds or nanoseconds since 1970-01-01 00:00:00.000000000 UTC +| INTERVAL_DAY<P> | Interval day to second. Supports a range of [-3,650,000..3,650,000] days with fractional second precision (P, number of digits) 0 <= P <= 9. Usually stored as separate integers for various components, but only the total number of fractional seconds is significant, i.e. `1d 0s` is considered equal to `0d 86400s`. | `int32` days, `int32` seconds, and `int64` fractional seconds, with the added constraint that each component can never independently specify more than 10,000 years, even if the components have opposite signs (e.g. `3650001d -86400s 0us` is **not** allowed) +| INTERVAL_COMPOUND<P> | A compound interval type that is composed of elements of the underlying elements and rules of both interval_month and interval_day to express arbitrary durations across multiple grains. Substrait gives no definition for the conversion of values between independent grains (e.g. months to days). ## User-Defined Types