diff --git a/site/docs/_config b/site/docs/_config index 68171c7db..e044eaa4e 100644 --- a/site/docs/_config +++ b/site/docs/_config @@ -1,14 +1,16 @@ -arrange: +nav: - index.md - - spec - - types - - expressions - - relations + - Introduction: about.md + - News & Articles: blog + - tutorial + - Format: + - spec + - types + - expressions + - relations - serialization - extensions - community - governance.md - - about.md - tools - - tutorial - - faq.md + - faq.md \ No newline at end of file diff --git a/site/docs/about.md b/site/docs/about.md index 802bec361..f26d76f3b 100644 --- a/site/docs/about.md +++ b/site/docs/about.md @@ -7,11 +7,11 @@ title: About Substrait ## Project Vision -The Substrait project aims to create a well-defined, cross-language [specification](spec/specification) for data compute operations. The specification declares a set of common operations, defines their semantics, and describes their behavior unambiguously. The project also defines extension points and serialized representations of the specification. +The Substrait project aims to create a well-defined, cross-language [specification](/spec/specification) for data compute operations. The specification declares a set of common operations, defines their semantics, and describes their behavior unambiguously. The project also defines extension points and serialized representations of the specification. In many ways, the goal of this project is similar to that of the Apache Arrow project. Arrow is focused on a standardized memory representation of columnar data. Substrait is focused on what should be done to data. - +See the [introductory tutorial](/tutorial/sql_to_substrait/) for a hands on introduction to Substrait ## Why not use SQL? diff --git a/site/docs/blog/2024-01-pytextformat.md b/site/docs/blog/2024-01-pytextformat.md new file mode 100644 index 000000000..3049958d0 --- /dev/null +++ b/site/docs/blog/2024-01-pytextformat.md @@ -0,0 +1,231 @@ +--- +title: Substrait Python 0.13 supports textual formats +description: Support for loading text representation and json representation has been released in Substrait-Python 0.13 +date: 2024-02-20 +--- + +# Substrait Python and plan formats + +Up to now the Substrait-Python library was a only able to represent in memory a Substrait plan +and emit or load it from a protobuf binary representation. + +In version 0.13 it was finally introduced the support to load it from more human readable formats +like the Text Format and the JSON Format. +The Text Format allows to more easily load plans manually built by humans and provides an effective way +to debug plans, while the JSON format acts as a bridge between the human and the machine, +providing a format that can be easily manipulated in all major programming languages, +shipped via text based protocols like HTTP while also being fairly readable for a human. + +## Using the Text Format + +``` py +import tempfile +from substrait.planloader import planloader + +with tempfile.NamedTemporaryFile(mode="rw+t) as tf: + tf.write(""" + + """) + testplan = planloader.load_substrait_plan(tf.name) +``` + +## Using JSON Format + +``` py +# SELECT count(exercise) AS exercise FROM crossfit WHERE difficulty_level <= 5'); +plan = { + "extensions":[ + { + "extensionFunction":{ + "functionAnchor":1, + "name":"lte" + } + }, + { + "extensionFunction":{ + "functionAnchor":2, + "name":"is_not_null" + } + }, + { + "extensionFunction":{ + "functionAnchor":3, + "name":"and" + } + }, + { + "extensionFunction":{ + "functionAnchor":4, + "name":"count" + } + } + ], + "relations":[ + { + "root":{ + "input":{ + "project":{ + "input":{ + "aggregate":{ + "input":{ + "read":{ + "baseSchema":{ + "names":[ + "exercise", + "difficulty_level" + ], + "struct":{ + "types":[ + { + "varchar":{ + "length":13, + "nullability":"NULLABILITY_NULLABLE" + } + }, + { + "i32":{ + "nullability":"NULLABILITY_NULLABLE" + } + } + ], + "nullability":"NULLABILITY_REQUIRED" + } + }, + "filter":{ + "scalarFunction":{ + "functionReference":3, + "outputType":{ + "bool":{ + "nullability":"NULLABILITY_NULLABLE" + } + }, + "arguments":[ + { + "value":{ + "scalarFunction":{ + "functionReference":1, + "outputType":{ + "i32":{ + "nullability":"NULLABILITY_NULLABLE" + } + }, + "arguments":[ + { + "value":{ + "selection":{ + "directReference":{ + "structField":{ + "field":1 + } + }, + "rootReference":{ + + } + } + } + }, + { + "value":{ + "literal":{ + "i32":5 + } + } + } + ] + } + } + }, + { + "value":{ + "scalarFunction":{ + "functionReference":2, + "outputType":{ + "i32":{ + "nullability":"NULLABILITY_NULLABLE" + } + }, + "arguments":[ + { + "value":{ + "selection":{ + "directReference":{ + "structField":{ + "field":1 + } + }, + "rootReference":{ + + } + } + } + } + ] + } + } + } + ] + } + }, + "projection":{ + "select":{ + "structItems":[ + { + + } + ] + }, + "maintainSingularStruct":true + }, + "namedTable":{ + "names":[ + "crossfit" + ] + } + } + }, + "groupings":[ + { + + } + ], + "measures":[ + { + "measure":{ + "functionReference":4, + "outputType":{ + "i64":{ + "nullability":"NULLABILITY_NULLABLE" + } + } + } + } + ] + } + }, + "expressions":[ + { + "selection":{ + "directReference":{ + "structField":{ + + } + }, + "rootReference":{ + + } + } + } + ] + } + }, + "names":[ + "exercise" + ] + } + } + ], + "version":{ + "minorNumber":24, + } +} +``` \ No newline at end of file diff --git a/site/docs/blog/index.md b/site/docs/blog/index.md new file mode 100644 index 000000000..fdb0840c2 --- /dev/null +++ b/site/docs/blog/index.md @@ -0,0 +1,6 @@ +--- +exclude_from_blog: true +--- +# News & Articles + +{{ blog_content }} \ No newline at end of file diff --git a/site/docs/tools/_config b/site/docs/tools/_config index 5d6fdf1e8..b425a9209 100644 --- a/site/docs/tools/_config +++ b/site/docs/tools/_config @@ -1,4 +1,5 @@ arrange: - producer_tools.md + - libraries.md - substrait_validator.md - third_party_tools.md diff --git a/site/docs/tools/libraries.md b/site/docs/tools/libraries.md new file mode 100644 index 000000000..d8b4ed7a6 --- /dev/null +++ b/site/docs/tools/libraries.md @@ -0,0 +1,27 @@ +# Substrait Libraries + +## Python + +[Substrait-Python](https://github.com/substrait-io/substrait-python) is a Python library to build and manipulate Substrait plans + +## Java + +[Substrait-Java](https://github.com/substrait-io/substrait-java) is a Java library to build and manipulate Substrait plans, +it includes the Isthmus tool too which can convert SQL to Substrait. + +## C++ + +[Substrait-Cpp](https://github.com/substrait-io/substrait-cpp) is a C++ library to build and manipulate Substrait plans, +it is the reference implementation and includes parsing for all official representation formats (text, protobuf, json) + +## Javascript + +[Substrait-Js](https://github.com/substrait-io/substrait-js) is a Javascript library to build and manipulate Substrait plans + +## Rust + +[Substrait-rs](https://github.com/substrait-io/substrait-rs) is a Rust library to build and manipulate Substrait plans + +## Go + +[Substrait-go](https://github.com/substrait-io/substrait-go) is a Go library to build and manipulate Substrait plans diff --git a/site/mkdocs.yml b/site/mkdocs.yml index 3d0c2f199..34d6d6a5d 100644 --- a/site/mkdocs.yml +++ b/site/mkdocs.yml @@ -39,6 +39,10 @@ plugins: - table-reader - markdownextradata - search + - blogging: + paging: off + dirs: + - blog - awesome-pages: filename: _config - minify: diff --git a/site/requirements.txt b/site/requirements.txt index 4d069b3e1..4dbd0c9bb 100644 --- a/site/requirements.txt +++ b/site/requirements.txt @@ -8,6 +8,7 @@ mkdocs-gen-files>=0.4.0,<1 mkdocs-markdownextradata-plugin>=0.2.5,<1 mkdocs-protobuf>=0.1.0,<1 mkdocs-table-reader-plugin>=2,<3 +mkdocs-blogging-plugin>=2,<3 pygments>=2.14,<3 oyaml>=1.0,<2 mdutils>=1.4.0,<2