From bd4b431c8c900580f944807f654bc829cdb5dc8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs=20Camacho=20Rodr=C3=ADguez?= Date: Tue, 10 Dec 2024 01:42:45 -0800 Subject: [PATCH] feat: add expression support for count and offset in the fetch operator (#748) * Update FetchRel in Substrait to support expressions for both offset and count. This is done in a way that should be effectively compatible with old systems. Old plans should be able to continue to be consumed treating the old values as int64. BREAKING CHANGE: The encoding of FetchRel has changed in a strictly backwards incompatible way. The change involves transitioning offset and count from a standalone int64 field to a oneof structure, where the original int64 field is marked as deprecated, and a new field of Expression type is introduced. Using a oneof may cause ambiguity between unset and set-to-zero states in older messages. However, the fields are defined such that their logical meaning remains indistinguishable, ensuring consistency across encodings. --- proto/substrait/algebra.proto | 33 ++++++++++++++++++++---- site/docs/relations/logical_relations.md | 10 +++---- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/proto/substrait/algebra.proto b/proto/substrait/algebra.proto index a3b9df7fb..80e88ded1 100644 --- a/proto/substrait/algebra.proto +++ b/proto/substrait/algebra.proto @@ -267,11 +267,34 @@ message CrossRel { message FetchRel { RelCommon common = 1; Rel input = 2; - // the offset expressed in number of records - int64 offset = 3; - // the amount of records to return - // use -1 to signal that ALL records should be returned - int64 count = 4; + // Note: A oneof field is inherently optional, whereas individual fields + // within a oneof cannot be marked as optional. The unset state of offset + // should therefore be checked at the oneof level. Unset is treated as 0. + oneof offset_mode { + // the offset expressed in number of records + // Deprecated: use `offset_expr` instead + int64 offset = 3 [deprecated = true]; + // Expression evaluated into a non-negative integer specifying the number + // of records to skip. An expression evaluating to null is treated as 0. + // Evaluating to a negative integer should result in an error. + // Recommended type for offset is int64. + Expression offset_expr = 5; + } + // Note: A oneof field is inherently optional, whereas individual fields + // within a oneof cannot be marked as optional. The unset state of count + // should therefore be checked at the oneof level. Unset is treated as ALL. + oneof count_mode { + // the amount of records to return + // use -1 to signal that ALL records should be returned + // Deprecated: use `count_expr` instead + int64 count = 4 [deprecated = true]; + // Expression evaluated into a non-negative integer specifying the number + // of records to return. An expression evaluating to null signals that ALL + // records should be returned. + // Evaluating to a negative integer should result in an error. + // Recommended type for count is int64. + Expression count_expr = 6; + } substrait.extensions.AdvancedExtension advanced_extension = 10; } diff --git a/site/docs/relations/logical_relations.md b/site/docs/relations/logical_relations.md index 9fd5d600f..e8c0014a3 100644 --- a/site/docs/relations/logical_relations.md +++ b/site/docs/relations/logical_relations.md @@ -338,11 +338,11 @@ The fetch operation eliminates records outside a desired window. Typically corre ### Fetch Properties -| Property | Description | Required | -| -------- | --------------------------------------------------------------------- | ------------------------ | -| Input | A relational input, typically with a desired orderedness property. | Required | -| Offset | A non-negative integer. Declares the offset for retrieval of records. | Optional, defaults to 0. | -| Count | A non-negative integer or -1. Declares the number of records that should be returned. -1 signals that ALL records should be returned. | Required | +| Property | Description | Required | +| ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -------------------------- | +| Input | A relational input, typically with a desired orderedness property. | Required | +| Offset Expression | An expression which evaluates to a non-negative integer or null (recommended type is `i64`). Declares the offset for retrieval of records. An expression evaluating to null is treated as 0. | Optional, defaults to a 0 literal. | +| Count Expression | An expression which evaluates to a non-negative integer or null (recommended type is `i64`). Declares the number of records that should be returned. An expression evaluating to null indicates that all records should be returned. | Optional, defaults to a null literal. | === "FetchRel Message"