From 982a29ba55b7a7b573b9fa5a59043c4c77cd128e Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Sat, 7 Dec 2024 16:03:41 -0600 Subject: [PATCH 1/2] Reorganize the Parser module I mostly did this as an exercise to get a general feel of how the Parser implementation is organized. The basics here are that for every top level keyword in Parser::parse_statement I created a new module and moved the corresponding function to that module. Then I spent a few hours checking `find references` and any method that was in a single new module got moved there. Towards the end I started making some arbitary decisions on where functions referenced from multiple modules lived. Some of these seemed obvious, while some were certainly arbitrary. Most of the motivation here was that working on a 13,000 line file was causing my editor to be very not happy. After this happy, the largest module is now src/parser/select.rs which clocks in at 2142 lines. I should note, that the only visible changes are hopefully a few functions that had visibility flipped from private to public because I forgot about pub(crate) when I first started. Other than that, this is purely copy/paste moving of code to new module files. --- src/parser/alter.rs | 652 +- src/parser/analyze.rs | 59 + src/parser/assert.rs | 14 + src/parser/assignment.rs | 23 + src/parser/attach.rs | 15 + src/parser/cache.rs | 93 + src/parser/call.rs | 82 + src/parser/close.rs | 15 + src/parser/columns.rs | 433 + src/parser/comment.rs | 48 + src/parser/commit.rs | 9 + src/parser/copy.rs | 224 + src/parser/create.rs | 1669 ++++ src/parser/deallocate.rs | 9 + src/parser/declare.rs | 76 + src/parser/delete.rs | 60 + src/parser/dialects/bigquery.rs | 135 + src/parser/dialects/clickhouse.rs | 50 + src/parser/dialects/duckdb.rs | 226 + src/parser/dialects/hive.rs | 192 + src/parser/dialects/mod.rs | 9 + src/parser/dialects/mssql.rs | 242 + src/parser/dialects/postgresql.rs | 280 + src/parser/dialects/snowflake.rs | 145 + src/parser/dialects/sqlite.rs | 23 + src/parser/dialects/utils.rs | 19 + src/parser/discard.rs | 21 + src/parser/drop.rs | 189 + src/parser/end.rs | 9 + src/parser/execute.rs | 38 + src/parser/explain.rs | 92 + src/parser/expr.rs | 2115 +++++ src/parser/fetch.rs | 64 + src/parser/flush.rs | 92 + src/parser/grant.rs | 157 + src/parser/identifier.rs | 278 + src/parser/insert.rs | 200 + src/parser/install.rs | 10 + src/parser/keyword.rs | 22 + src/parser/kill.rs | 29 + src/parser/listen.rs | 8 + src/parser/lists.rs | 100 + src/parser/load.rs | 50 + src/parser/merge.rs | 123 + src/parser/mod.rs | 13183 +--------------------------- src/parser/msck.rs | 30 + src/parser/notify.rs | 13 + src/parser/optimize.rs | 42 + src/parser/options.rs | 128 + src/parser/pragma.rs | 42 + src/parser/prepare.rs | 21 + src/parser/release.rs | 10 + src/parser/replace.rs | 22 + src/parser/revoke.rs | 32 + src/parser/rollback.rs | 32 + src/parser/savepoint.rs | 8 + src/parser/select.rs | 2142 +++++ src/parser/set.rs | 126 + src/parser/show.rs | 299 + src/parser/start.rs | 74 + src/parser/tests.rs | 695 ++ src/parser/tokens.rs | 359 + src/parser/truncate.rs | 54 + src/parser/uncache.rs | 14 + src/parser/unlisten.rs | 18 + src/parser/unload.rs | 20 + src/parser/update.rs | 35 + src/parser/use.rs | 77 + src/parser/value.rs | 756 ++ src/parser/window.rs | 105 + src/tokenizer.rs | 10 +- 71 files changed, 13519 insertions(+), 13227 deletions(-) create mode 100644 src/parser/analyze.rs create mode 100644 src/parser/assert.rs create mode 100644 src/parser/assignment.rs create mode 100644 src/parser/attach.rs create mode 100644 src/parser/cache.rs create mode 100644 src/parser/call.rs create mode 100644 src/parser/close.rs create mode 100644 src/parser/columns.rs create mode 100644 src/parser/comment.rs create mode 100644 src/parser/commit.rs create mode 100644 src/parser/copy.rs create mode 100644 src/parser/create.rs create mode 100644 src/parser/deallocate.rs create mode 100644 src/parser/declare.rs create mode 100644 src/parser/delete.rs create mode 100644 src/parser/dialects/bigquery.rs create mode 100644 src/parser/dialects/clickhouse.rs create mode 100644 src/parser/dialects/duckdb.rs create mode 100644 src/parser/dialects/hive.rs create mode 100644 src/parser/dialects/mod.rs create mode 100644 src/parser/dialects/mssql.rs create mode 100644 src/parser/dialects/postgresql.rs create mode 100644 src/parser/dialects/snowflake.rs create mode 100644 src/parser/dialects/sqlite.rs create mode 100644 src/parser/dialects/utils.rs create mode 100644 src/parser/discard.rs create mode 100644 src/parser/drop.rs create mode 100644 src/parser/end.rs create mode 100644 src/parser/execute.rs create mode 100644 src/parser/explain.rs create mode 100644 src/parser/expr.rs create mode 100644 src/parser/fetch.rs create mode 100644 src/parser/flush.rs create mode 100644 src/parser/grant.rs create mode 100644 src/parser/identifier.rs create mode 100644 src/parser/insert.rs create mode 100644 src/parser/install.rs create mode 100644 src/parser/keyword.rs create mode 100644 src/parser/kill.rs create mode 100644 src/parser/listen.rs create mode 100644 src/parser/lists.rs create mode 100644 src/parser/load.rs create mode 100644 src/parser/merge.rs create mode 100644 src/parser/msck.rs create mode 100644 src/parser/notify.rs create mode 100644 src/parser/optimize.rs create mode 100644 src/parser/options.rs create mode 100644 src/parser/pragma.rs create mode 100644 src/parser/prepare.rs create mode 100644 src/parser/release.rs create mode 100644 src/parser/replace.rs create mode 100644 src/parser/revoke.rs create mode 100644 src/parser/rollback.rs create mode 100644 src/parser/savepoint.rs create mode 100644 src/parser/select.rs create mode 100644 src/parser/set.rs create mode 100644 src/parser/show.rs create mode 100644 src/parser/start.rs create mode 100644 src/parser/tests.rs create mode 100644 src/parser/tokens.rs create mode 100644 src/parser/truncate.rs create mode 100644 src/parser/uncache.rs create mode 100644 src/parser/unlisten.rs create mode 100644 src/parser/unload.rs create mode 100644 src/parser/update.rs create mode 100644 src/parser/use.rs create mode 100644 src/parser/value.rs create mode 100644 src/parser/window.rs diff --git a/src/parser/alter.rs b/src/parser/alter.rs index 3ac4ab0c7..cf6aaae9c 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -15,28 +15,71 @@ #[cfg(not(feature = "std"))] use alloc::vec; -use super::{Parser, ParserError}; -use crate::{ - ast::{ - AlterPolicyOperation, AlterRoleOperation, Expr, Password, ResetConfig, RoleOption, - SetConfigValue, Statement, - }, - dialect::{MsSqlDialect, PostgreSqlDialect}, - keywords::Keyword, - tokenizer::Token, -}; - -impl Parser<'_> { - pub fn parse_alter_role(&mut self) -> Result { - if dialect_of!(self is PostgreSqlDialect) { - return self.parse_pg_alter_role(); - } else if dialect_of!(self is MsSqlDialect) { - return self.parse_mssql_alter_role(); - } +use crate::parser::*; - Err(ParserError::ParserError( - "ALTER ROLE is only support for PostgreSqlDialect, MsSqlDialect".into(), - )) +impl<'a> Parser<'a> { + pub fn parse_alter(&mut self) -> Result { + let object_type = self.expect_one_of_keywords(&[ + Keyword::VIEW, + Keyword::TABLE, + Keyword::INDEX, + Keyword::ROLE, + Keyword::POLICY, + ])?; + match object_type { + Keyword::VIEW => self.parse_alter_view(), + Keyword::TABLE => { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] + let table_name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; + + let mut location = None; + if self.parse_keyword(Keyword::LOCATION) { + location = Some(HiveSetLocation { + has_set: false, + location: self.parse_identifier(false)?, + }); + } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { + location = Some(HiveSetLocation { + has_set: true, + location: self.parse_identifier(false)?, + }); + } + + Ok(Statement::AlterTable { + name: table_name, + if_exists, + only, + operations, + location, + on_cluster, + }) + } + Keyword::INDEX => { + let index_name = self.parse_object_name(false)?; + let operation = if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let index_name = self.parse_object_name(false)?; + AlterIndexOperation::RenameIndex { index_name } + } else { + return self.expected("TO after RENAME", self.peek_token()); + } + } else { + return self.expected("RENAME after ALTER INDEX", self.peek_token()); + }; + + Ok(Statement::AlterIndex { + name: index_name, + operation, + }) + } + Keyword::ROLE => self.parse_alter_role(), + Keyword::POLICY => self.parse_alter_policy(), + // unreachable because expect_one_of_keywords used above + _ => unreachable!(), + } } /// Parse ALTER POLICY statement @@ -99,169 +142,462 @@ impl Parser<'_> { } } - fn parse_mssql_alter_role(&mut self) -> Result { - let role_name = self.parse_identifier(false)?; - - let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { - let member_name = self.parse_identifier(false)?; - AlterRoleOperation::AddMember { member_name } - } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { - let member_name = self.parse_identifier(false)?; - AlterRoleOperation::DropMember { member_name } - } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { - if self.consume_token(&Token::Eq) { - let role_name = self.parse_identifier(false)?; - AlterRoleOperation::RenameRole { role_name } - } else { - return self.expected("= after WITH NAME ", self.peek_token()); - } - } else { - return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); - }; + pub fn parse_alter_role(&mut self) -> Result { + if dialect_of!(self is PostgreSqlDialect) { + return self.parse_pg_alter_role(); + } else if dialect_of!(self is MsSqlDialect) { + return self.parse_mssql_alter_role(); + } - Ok(Statement::AlterRole { - name: role_name, - operation, - }) + Err(ParserError::ParserError( + "ALTER ROLE is only support for PostgreSqlDialect, MsSqlDialect".into(), + )) } - fn parse_pg_alter_role(&mut self) -> Result { - let role_name = self.parse_identifier(false)?; - - // [ IN DATABASE _`database_name`_ ] - let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { - self.parse_object_name(false).ok() - } else { - None - }; + pub fn parse_alter_table_add_projection(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let query = self.parse_projection_select()?; + Ok(AlterTableOperation::AddProjection { + if_not_exists, + name, + select: query, + }) + } - let operation = if self.parse_keyword(Keyword::RENAME) { - if self.parse_keyword(Keyword::TO) { - let role_name = self.parse_identifier(false)?; - AlterRoleOperation::RenameRole { role_name } + pub fn parse_alter_table_operation(&mut self) -> Result { + let operation = if self.parse_keyword(Keyword::ADD) { + if let Some(constraint) = self.parse_optional_table_constraint()? { + AlterTableOperation::AddConstraint(constraint) + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PROJECTION) + { + return self.parse_alter_table_add_projection(); } else { - return self.expected("TO after RENAME", self.peek_token()); - } - // SET - } else if self.parse_keyword(Keyword::SET) { - let config_name = self.parse_object_name(false)?; - // FROM CURRENT - if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { - AlterRoleOperation::Set { - config_name, - config_value: SetConfigValue::FromCurrent, - in_database, - } - // { TO | = } { value | DEFAULT } - } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if self.parse_keyword(Keyword::DEFAULT) { - AlterRoleOperation::Set { - config_name, - config_value: SetConfigValue::Default, - in_database, + let if_not_exists = + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let mut new_partitions = vec![]; + loop { + if self.parse_keyword(Keyword::PARTITION) { + new_partitions.push(self.parse_partition()?); + } else { + break; } - } else if let Ok(expr) = self.parse_expr() { - AlterRoleOperation::Set { - config_name, - config_value: SetConfigValue::Value(expr), - in_database, + } + if !new_partitions.is_empty() { + AlterTableOperation::AddPartitions { + if_not_exists, + new_partitions, } } else { - self.expected("config value", self.peek_token())? + let column_keyword = self.parse_keyword(Keyword::COLUMN); + + let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) + { + self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) + || if_not_exists + } else { + false + }; + + let column_def = self.parse_column_def()?; + + let column_position = self.parse_column_position()?; + + AlterTableOperation::AddColumn { + column_keyword, + if_not_exists, + column_def, + column_position, + } } + } + } else if self.parse_keyword(Keyword::RENAME) { + if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { + let old_name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::TO)?; + let new_name = self.parse_identifier(false)?; + AlterTableOperation::RenameConstraint { old_name, new_name } + } else if self.parse_keyword(Keyword::TO) { + let table_name = self.parse_object_name(false)?; + AlterTableOperation::RenameTable { table_name } } else { - self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_column_name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::TO)?; + let new_column_name = self.parse_identifier(false)?; + AlterTableOperation::RenameColumn { + old_column_name, + new_column_name, + } + } + } else if self.parse_keyword(Keyword::DISABLE) { + if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { + AlterTableOperation::DisableRowLevelSecurity {} + } else if self.parse_keyword(Keyword::RULE) { + let name = self.parse_identifier(false)?; + AlterTableOperation::DisableRule { name } + } else if self.parse_keyword(Keyword::TRIGGER) { + let name = self.parse_identifier(false)?; + AlterTableOperation::DisableTrigger { name } + } else { + return self.expected( + "ROW LEVEL SECURITY, RULE, or TRIGGER after DISABLE", + self.peek_token(), + ); + } + } else if self.parse_keyword(Keyword::ENABLE) { + if self.parse_keywords(&[Keyword::ALWAYS, Keyword::RULE]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableAlwaysRule { name } + } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::TRIGGER]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableAlwaysTrigger { name } + } else if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { + AlterTableOperation::EnableRowLevelSecurity {} + } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::RULE]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableReplicaRule { name } + } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::TRIGGER]) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableReplicaTrigger { name } + } else if self.parse_keyword(Keyword::RULE) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableRule { name } + } else if self.parse_keyword(Keyword::TRIGGER) { + let name = self.parse_identifier(false)?; + AlterTableOperation::EnableTrigger { name } + } else { + return self.expected( + "ALWAYS, REPLICA, ROW LEVEL SECURITY, RULE, or TRIGGER after ENABLE", + self.peek_token(), + ); + } + } else if self.parse_keywords(&[Keyword::CLEAR, Keyword::PROJECTION]) + && dialect_of!(self is ClickHouseDialect|GenericDialect) + { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::ClearProjection { + if_exists, + name, + partition, + } + } else if self.parse_keywords(&[Keyword::MATERIALIZE, Keyword::PROJECTION]) + && dialect_of!(self is ClickHouseDialect|GenericDialect) + { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::MaterializeProjection { + if_exists, + name, + partition, } - // RESET - } else if self.parse_keyword(Keyword::RESET) { - if self.parse_keyword(Keyword::ALL) { - AlterRoleOperation::Reset { - config_name: ResetConfig::ALL, - in_database, + } else if self.parse_keyword(Keyword::DROP) { + if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: true, + } + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::DropPartitions { + partitions, + if_exists: false, + } + } else if self.parse_keyword(Keyword::CONSTRAINT) { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropConstraint { + if_exists, + name, + cascade, } + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + AlterTableOperation::DropPrimaryKey + } else if self.parse_keyword(Keyword::PROJECTION) + && dialect_of!(self is ClickHouseDialect|GenericDialect) + { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + AlterTableOperation::DropProjection { if_exists, name } + } else if self.parse_keywords(&[Keyword::CLUSTERING, Keyword::KEY]) { + AlterTableOperation::DropClusteringKey } else { - let config_name = self.parse_object_name(false)?; - AlterRoleOperation::Reset { - config_name: ResetConfig::ConfigName(config_name), - in_database, + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let column_name = self.parse_identifier(false)?; + let cascade = self.parse_keyword(Keyword::CASCADE); + AlterTableOperation::DropColumn { + column_name, + if_exists, + cascade, } } - // option - } else { - // [ WITH ] - let _ = self.parse_keyword(Keyword::WITH); - // option - let mut options = vec![]; - while let Some(opt) = self.maybe_parse(|parser| parser.parse_pg_role_option())? { - options.push(opt); + } else if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let before = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::RENAME)?; + self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; + self.expect_token(&Token::LParen)?; + let renames = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::RenamePartitions { + old_partitions: before, + new_partitions: renames, } - // check option - if options.is_empty() { - return self.expected("option", self.peek_token())?; + } else if self.parse_keyword(Keyword::CHANGE) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let old_name = self.parse_identifier(false)?; + let new_name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); } - AlterRoleOperation::WithOptions { options } - }; + let column_position = self.parse_column_position()?; - Ok(Statement::AlterRole { - name: role_name, - operation, - }) - } + AlterTableOperation::ChangeColumn { + old_name, + new_name, + data_type, + options, + column_position, + } + } else if self.parse_keyword(Keyword::MODIFY) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let col_name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + let mut options = vec![]; + while let Some(option) = self.parse_optional_column_option()? { + options.push(option); + } - fn parse_pg_role_option(&mut self) -> Result { - let option = match self.parse_one_of_keywords(&[ - Keyword::BYPASSRLS, - Keyword::NOBYPASSRLS, - Keyword::CONNECTION, - Keyword::CREATEDB, - Keyword::NOCREATEDB, - Keyword::CREATEROLE, - Keyword::NOCREATEROLE, - Keyword::INHERIT, - Keyword::NOINHERIT, - Keyword::LOGIN, - Keyword::NOLOGIN, - Keyword::PASSWORD, - Keyword::REPLICATION, - Keyword::NOREPLICATION, - Keyword::SUPERUSER, - Keyword::NOSUPERUSER, - Keyword::VALID, - ]) { - Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), - Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), - Some(Keyword::CONNECTION) => { - self.expect_keyword(Keyword::LIMIT)?; - RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) + let column_position = self.parse_column_position()?; + + AlterTableOperation::ModifyColumn { + col_name, + data_type, + options, + column_position, } - Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), - Some(Keyword::NOCREATEDB) => RoleOption::CreateDB(false), - Some(Keyword::CREATEROLE) => RoleOption::CreateRole(true), - Some(Keyword::NOCREATEROLE) => RoleOption::CreateRole(false), - Some(Keyword::INHERIT) => RoleOption::Inherit(true), - Some(Keyword::NOINHERIT) => RoleOption::Inherit(false), - Some(Keyword::LOGIN) => RoleOption::Login(true), - Some(Keyword::NOLOGIN) => RoleOption::Login(false), - Some(Keyword::PASSWORD) => { - let password = if self.parse_keyword(Keyword::NULL) { - Password::NullPassword + } else if self.parse_keyword(Keyword::ALTER) { + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] + let column_name = self.parse_identifier(false)?; + let is_postgresql = dialect_of!(self is PostgreSqlDialect); + + let op: AlterColumnOperation = if self.parse_keywords(&[ + Keyword::SET, + Keyword::NOT, + Keyword::NULL, + ]) { + AlterColumnOperation::SetNotNull {} + } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { + AlterColumnOperation::DropNotNull {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + AlterColumnOperation::SetDefault { + value: self.parse_expr()?, + } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { + AlterColumnOperation::DropDefault {} + } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) + || (is_postgresql && self.parse_keyword(Keyword::TYPE)) + { + let data_type = self.parse_data_type()?; + let using = if is_postgresql && self.parse_keyword(Keyword::USING) { + Some(self.parse_expr()?) + } else { + None + }; + AlterColumnOperation::SetDataType { data_type, using } + } else if self.parse_keywords(&[Keyword::ADD, Keyword::GENERATED]) { + let generated_as = if self.parse_keyword(Keyword::ALWAYS) { + Some(GeneratedAs::Always) + } else if self.parse_keywords(&[Keyword::BY, Keyword::DEFAULT]) { + Some(GeneratedAs::ByDefault) + } else { + None + }; + + self.expect_keywords(&[Keyword::AS, Keyword::IDENTITY])?; + + let mut sequence_options: Option> = None; + + if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + sequence_options = Some(self.parse_create_sequence_options()?); + self.expect_token(&Token::RParen)?; + } + + AlterColumnOperation::AddGenerated { + generated_as, + sequence_options, + } + } else { + let message = if is_postgresql { + "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE, or ADD GENERATED after ALTER COLUMN" } else { - Password::Password(Expr::Value(self.parse_value()?)) + "SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN" }; - RoleOption::Password(password) + + return self.expected(message, self.peek_token()); + }; + AlterTableOperation::AlterColumn { column_name, op } + } else if self.parse_keyword(Keyword::SWAP) { + self.expect_keyword(Keyword::WITH)?; + let table_name = self.parse_object_name(false)?; + AlterTableOperation::SwapWith { table_name } + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) + { + let new_owner = self.parse_owner()?; + AlterTableOperation::OwnerTo { new_owner } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::ATTACH) + { + AlterTableOperation::AttachPartition { + partition: self.parse_part_or_partition()?, } - Some(Keyword::REPLICATION) => RoleOption::Replication(true), - Some(Keyword::NOREPLICATION) => RoleOption::Replication(false), - Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), - Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), - Some(Keyword::VALID) => { - self.expect_keyword(Keyword::UNTIL)?; - RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::DETACH) + { + AlterTableOperation::DetachPartition { + partition: self.parse_part_or_partition()?, + } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::FREEZE) + { + let partition = self.parse_part_or_partition()?; + let with_name = if self.parse_keyword(Keyword::WITH) { + self.expect_keyword(Keyword::NAME)?; + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::FreezePartition { + partition, + with_name, + } + } else if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::UNFREEZE) + { + let partition = self.parse_part_or_partition()?; + let with_name = if self.parse_keyword(Keyword::WITH) { + self.expect_keyword(Keyword::NAME)?; + Some(self.parse_identifier(false)?) + } else { + None + }; + AlterTableOperation::UnfreezePartition { + partition, + with_name, + } + } else if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let exprs = self.parse_comma_separated(|parser| parser.parse_expr())?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::ClusterBy { exprs } + } else if self.parse_keywords(&[Keyword::SUSPEND, Keyword::RECLUSTER]) { + AlterTableOperation::SuspendRecluster + } else if self.parse_keywords(&[Keyword::RESUME, Keyword::RECLUSTER]) { + AlterTableOperation::ResumeRecluster + } else { + let options: Vec = + self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; + if !options.is_empty() { + AlterTableOperation::SetTblProperties { + table_properties: options, + } + } else { + return self.expected( + "ADD, RENAME, PARTITION, SWAP, DROP, or SET TBLPROPERTIES after ALTER TABLE", + self.peek_token(), + ); } - _ => self.expected("option", self.peek_token())?, }; + Ok(operation) + } + + pub fn parse_alter_view(&mut self) -> Result { + let name = self.parse_object_name(false)?; + let columns = self.parse_parenthesized_column_list(Optional, false)?; + + let with_options = self.parse_options(Keyword::WITH)?; + + self.expect_keyword(Keyword::AS)?; + let query = self.parse_query()?; - Ok(option) + Ok(Statement::AlterView { + name, + columns, + query, + with_options, + }) + } + + pub fn parse_partition(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Partition::Partitions(partitions)) + } + + pub fn parse_projection_select(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::SELECT)?; + let projection = self.parse_projection()?; + let group_by = self.parse_optional_group_by()?; + let order_by = self.parse_optional_order_by()?; + self.expect_token(&Token::RParen)?; + Ok(ProjectionSelect { + projection, + group_by, + order_by, + }) + } + + fn parse_column_position(&mut self) -> Result, ParserError> { + if dialect_of!(self is MySqlDialect | GenericDialect) { + if self.parse_keyword(Keyword::FIRST) { + Ok(Some(MySQLColumnPosition::First)) + } else if self.parse_keyword(Keyword::AFTER) { + let ident = self.parse_identifier(false)?; + Ok(Some(MySQLColumnPosition::After(ident))) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + fn parse_part_or_partition(&mut self) -> Result { + let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; + match keyword { + Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), + Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), + // unreachable because expect_one_of_keywords used above + _ => unreachable!(), + } } } diff --git a/src/parser/analyze.rs b/src/parser/analyze.rs new file mode 100644 index 000000000..239a0d43d --- /dev/null +++ b/src/parser/analyze.rs @@ -0,0 +1,59 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_analyze(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name(false)?; + let mut for_columns = false; + let mut cache_metadata = false; + let mut noscan = false; + let mut partitions = None; + let mut compute_statistics = false; + let mut columns = vec![]; + loop { + match self.parse_one_of_keywords(&[ + Keyword::PARTITION, + Keyword::FOR, + Keyword::CACHE, + Keyword::NOSCAN, + Keyword::COMPUTE, + ]) { + Some(Keyword::PARTITION) => { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + Some(Keyword::NOSCAN) => noscan = true, + Some(Keyword::FOR) => { + self.expect_keyword(Keyword::COLUMNS)?; + + columns = self + .maybe_parse(|parser| { + parser.parse_comma_separated(|p| p.parse_identifier(false)) + })? + .unwrap_or_default(); + for_columns = true + } + Some(Keyword::CACHE) => { + self.expect_keyword(Keyword::METADATA)?; + cache_metadata = true + } + Some(Keyword::COMPUTE) => { + self.expect_keyword(Keyword::STATISTICS)?; + compute_statistics = true + } + _ => break, + } + } + + Ok(Statement::Analyze { + table_name, + for_columns, + columns, + partitions, + cache_metadata, + noscan, + compute_statistics, + }) + } +} diff --git a/src/parser/assert.rs b/src/parser/assert.rs new file mode 100644 index 000000000..6dc4aaee6 --- /dev/null +++ b/src/parser/assert.rs @@ -0,0 +1,14 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_assert(&mut self) -> Result { + let condition = self.parse_expr()?; + let message = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Statement::Assert { condition, message }) + } +} diff --git a/src/parser/assignment.rs b/src/parser/assignment.rs new file mode 100644 index 000000000..3467951e2 --- /dev/null +++ b/src/parser/assignment.rs @@ -0,0 +1,23 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a `var = expr` assignment, used in an UPDATE statement + pub fn parse_assignment(&mut self) -> Result { + let target = self.parse_assignment_target()?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + Ok(Assignment { target, value }) + } + + /// Parse the left-hand side of an assignment, used in an UPDATE statement + pub fn parse_assignment_target(&mut self) -> Result { + if self.consume_token(&Token::LParen) { + let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_token(&Token::RParen)?; + Ok(AssignmentTarget::Tuple(columns)) + } else { + let column = self.parse_object_name(false)?; + Ok(AssignmentTarget::ColumnName(column)) + } + } +} diff --git a/src/parser/attach.rs b/src/parser/attach.rs new file mode 100644 index 000000000..b2f437833 --- /dev/null +++ b/src/parser/attach.rs @@ -0,0 +1,15 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_attach_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let database_file_name = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let schema_name = self.parse_identifier(false)?; + Ok(Statement::AttachDatabase { + database, + schema_name, + database_file_name, + }) + } +} diff --git a/src/parser/cache.rs b/src/parser/cache.rs new file mode 100644 index 000000000..3d5c388d5 --- /dev/null +++ b/src/parser/cache.rs @@ -0,0 +1,93 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a CACHE TABLE statement + pub fn parse_cache_table(&mut self) -> Result { + let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); + if self.parse_keyword(Keyword::TABLE) { + let table_name = self.parse_object_name(false)?; + if self.peek_token().token != Token::EOF { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = self.parse_options(Keyword::OPTIONS)? + } + }; + + if self.peek_token().token != Token::EOF { + let (a, q) = self.parse_as_query()?; + has_as = a; + query = Some(q); + } + + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } else { + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } + } else { + table_flag = Some(self.parse_object_name(false)?); + if self.parse_keyword(Keyword::TABLE) { + let table_name = self.parse_object_name(false)?; + if self.peek_token() != Token::EOF { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = self.parse_options(Keyword::OPTIONS)? + } + }; + + if self.peek_token() != Token::EOF { + let (a, q) = self.parse_as_query()?; + has_as = a; + query = Some(q); + } + + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } else { + Ok(Statement::Cache { + table_flag, + table_name, + has_as, + options, + query, + }) + } + } else { + if self.peek_token() == Token::EOF { + self.prev_token(); + } + self.expected("a `TABLE` keyword", self.peek_token()) + } + } + } + + /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX` + pub fn parse_as_query(&mut self) -> Result<(bool, Box), ParserError> { + match self.peek_token().token { + Token::Word(word) => match word.keyword { + Keyword::AS => { + self.next_token(); + Ok((true, self.parse_query()?)) + } + _ => Ok((false, self.parse_query()?)), + }, + _ => self.expected("a QUERY statement", self.peek_token()), + } + } +} diff --git a/src/parser/call.rs b/src/parser/call.rs new file mode 100644 index 000000000..3e0ae7a4e --- /dev/null +++ b/src/parser/call.rs @@ -0,0 +1,82 @@ +use crate::parser::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse a `CALL procedure_name(arg1, arg2, ...)` + /// or `CALL procedure_name` statement + pub fn parse_call(&mut self) -> Result { + let object_name = self.parse_object_name(false)?; + if self.peek_token().token == Token::LParen { + match self.parse_function(object_name)? { + Expr::Function(f) => Ok(Statement::Call(f)), + other => parser_err!( + format!("Expected a simple procedure call but found: {other}"), + self.peek_token().span.start + ), + } + } else { + Ok(Statement::Call(Function { + name: object_name, + parameters: FunctionArguments::None, + args: FunctionArguments::None, + over: None, + filter: None, + null_treatment: None, + within_group: vec![], + })) + } + } + + pub fn parse_function_desc(&mut self) -> Result { + let name = self.parse_object_name(false)?; + + let args = if self.consume_token(&Token::LParen) { + if self.consume_token(&Token::RParen) { + None + } else { + let args = self.parse_comma_separated(Parser::parse_function_arg)?; + self.expect_token(&Token::RParen)?; + Some(args) + } + } else { + None + }; + + Ok(FunctionDesc { name, args }) + } + + pub(crate) fn parse_function_arg(&mut self) -> Result { + let mode = if self.parse_keyword(Keyword::IN) { + Some(ArgMode::In) + } else if self.parse_keyword(Keyword::OUT) { + Some(ArgMode::Out) + } else if self.parse_keyword(Keyword::INOUT) { + Some(ArgMode::InOut) + } else { + None + }; + + // parse: [ argname ] argtype + let mut name = None; + let mut data_type = self.parse_data_type()?; + if let DataType::Custom(n, _) = &data_type { + // the first token is actually a name + name = Some(n.0[0].clone()); + data_type = self.parse_data_type()?; + } + + let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) + { + Some(self.parse_expr()?) + } else { + None + }; + Ok(OperateFunctionArg { + mode, + name, + data_type, + default_expr, + }) + } +} diff --git a/src/parser/close.rs b/src/parser/close.rs new file mode 100644 index 000000000..dd6eef6ee --- /dev/null +++ b/src/parser/close.rs @@ -0,0 +1,15 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_close(&mut self) -> Result { + let cursor = if self.parse_keyword(Keyword::ALL) { + CloseCursor::All + } else { + let name = self.parse_identifier(false)?; + + CloseCursor::Specific { name } + }; + + Ok(Statement::Close { cursor }) + } +} diff --git a/src/parser/columns.rs b/src/parser/columns.rs new file mode 100644 index 000000000..012c84fd1 --- /dev/null +++ b/src/parser/columns.rs @@ -0,0 +1,433 @@ +use super::*; + +impl<'a> Parser<'a> { + pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { + let mut columns = vec![]; + let mut constraints = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok((columns, constraints)); + } + + loop { + if let Some(constraint) = self.parse_optional_table_constraint()? { + constraints.push(constraint); + } else if let Token::Word(_) = self.peek_token().token { + columns.push(self.parse_column_def()?); + } else { + return self.expected("column name or constraint definition", self.peek_token()); + } + + let comma = self.consume_token(&Token::Comma); + let rparen = self.peek_token().token == Token::RParen; + + if !comma && !rparen { + return self.expected("',' or ')' after column definition", self.peek_token()); + }; + + if rparen && (!comma || self.options.trailing_commas) { + let _ = self.consume_token(&Token::RParen); + break; + } + } + + Ok((columns, constraints)) + } + + pub fn parse_column_def(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let data_type = if self.is_column_type_sqlite_unspecified() { + DataType::Unspecified + } else { + self.parse_data_type()? + }; + let mut collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_object_name(false)?) + } else { + None + }; + let mut options = vec![]; + loop { + if self.parse_keyword(Keyword::CONSTRAINT) { + let name = Some(self.parse_identifier(false)?); + if let Some(option) = self.parse_optional_column_option()? { + options.push(ColumnOptionDef { name, option }); + } else { + return self.expected( + "constraint details after CONSTRAINT ", + self.peek_token(), + ); + } + } else if let Some(option) = self.parse_optional_column_option()? { + options.push(ColumnOptionDef { name: None, option }); + } else if dialect_of!(self is MySqlDialect | SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COLLATE) + { + collation = Some(self.parse_object_name(false)?); + } else { + break; + }; + } + Ok(ColumnDef { + name, + data_type, + collation, + options, + }) + } + + fn is_column_type_sqlite_unspecified(&mut self) -> bool { + if dialect_of!(self is SQLiteDialect) { + match self.peek_token().token { + Token::Word(word) => matches!( + word.keyword, + Keyword::CONSTRAINT + | Keyword::PRIMARY + | Keyword::NOT + | Keyword::UNIQUE + | Keyword::CHECK + | Keyword::DEFAULT + | Keyword::COLLATE + | Keyword::REFERENCES + | Keyword::GENERATED + | Keyword::AS + ), + _ => true, // e.g. comma immediately after column name + } + } else { + false + } + } + + pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { + if let Some(option) = self.dialect.parse_column_option(self)? { + return option; + } + + if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { + Ok(Some(ColumnOption::CharacterSet( + self.parse_object_name(false)?, + ))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { + Ok(Some(ColumnOption::NotNull)) + } else if self.parse_keywords(&[Keyword::COMMENT]) { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))), + _ => self.expected("string", next_token), + } + } else if self.parse_keyword(Keyword::NULL) { + Ok(Some(ColumnOption::Null)) + } else if self.parse_keyword(Keyword::DEFAULT) { + Ok(Some(ColumnOption::Default(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::MATERIALIZED) + { + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::ALIAS) + { + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) + } else if dialect_of!(self is ClickHouseDialect| GenericDialect) + && self.parse_keyword(Keyword::EPHEMERAL) + { + // The expression is optional for the EPHEMERAL syntax, so we need to check + // if the column definition has remaining tokens before parsing the expression. + if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + Ok(Some(ColumnOption::Ephemeral(None))) + } else { + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) + } + } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(ColumnOption::Unique { + is_primary: true, + characteristics, + })) + } else if self.parse_keyword(Keyword::UNIQUE) { + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(ColumnOption::Unique { + is_primary: false, + characteristics, + })) + } else if self.parse_keyword(Keyword::REFERENCES) { + let foreign_table = self.parse_object_name(false)?; + // PostgreSQL allows omitting the column list and + // uses the primary key column of the foreign table by default + let referred_columns = self.parse_parenthesized_column_list(Optional, false)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() + && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } + let characteristics = self.parse_constraint_characteristics()?; + + Ok(Some(ColumnOption::ForeignKey { + foreign_table, + referred_columns, + on_delete, + on_update, + characteristics, + })) + } else if self.parse_keyword(Keyword::CHECK) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Some(ColumnOption::Check(expr))) + } else if self.parse_keyword(Keyword::AUTO_INCREMENT) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + // Support AUTO_INCREMENT for MySQL + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTO_INCREMENT"), + ]))) + } else if self.parse_keyword(Keyword::AUTOINCREMENT) + && dialect_of!(self is SQLiteDialect | GenericDialect) + { + // Support AUTOINCREMENT for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("AUTOINCREMENT"), + ]))) + } else if self.parse_keyword(Keyword::ASC) + && self.dialect.supports_asc_desc_in_column_definition() + { + // Support ASC for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("ASC"), + ]))) + } else if self.parse_keyword(Keyword::DESC) + && self.dialect.supports_asc_desc_in_column_definition() + { + // Support DESC for SQLite + Ok(Some(ColumnOption::DialectSpecific(vec![ + Token::make_keyword("DESC"), + ]))) + } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + let expr = self.parse_expr()?; + Ok(Some(ColumnOption::OnUpdate(expr))) + } else if self.parse_keyword(Keyword::GENERATED) { + self.parse_optional_column_option_generated() + } else if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::OPTIONS) + { + self.prev_token(); + Ok(Some(ColumnOption::Options( + self.parse_options(Keyword::OPTIONS)?, + ))) + } else if self.parse_keyword(Keyword::AS) + && dialect_of!(self is MySqlDialect | SQLiteDialect | DuckDbDialect | GenericDialect) + { + self.parse_optional_column_option_as() + } else if self.parse_keyword(Keyword::IDENTITY) + && dialect_of!(self is MsSqlDialect | GenericDialect) + { + let parameters = if self.consume_token(&Token::LParen) { + let seed = self.parse_number()?; + self.expect_token(&Token::Comma)?; + let increment = self.parse_number()?; + self.expect_token(&Token::RParen)?; + + Some(IdentityPropertyFormatKind::FunctionCall( + IdentityParameters { seed, increment }, + )) + } else { + None + }; + Ok(Some(ColumnOption::Identity( + IdentityPropertyKind::Identity(IdentityProperty { + parameters, + order: None, + }), + ))) + } else if dialect_of!(self is SQLiteDialect | GenericDialect) + && self.parse_keywords(&[Keyword::ON, Keyword::CONFLICT]) + { + // Support ON CONFLICT for SQLite + Ok(Some(ColumnOption::OnConflict( + self.expect_one_of_keywords(&[ + Keyword::ROLLBACK, + Keyword::ABORT, + Keyword::FAIL, + Keyword::IGNORE, + Keyword::REPLACE, + ])?, + ))) + } else { + Ok(None) + } + } + + pub(crate) fn parse_tag(&mut self) -> Result { + let name = self.parse_identifier(false)?; + self.expect_token(&Token::Eq)?; + let value = self.parse_literal_string()?; + + Ok(Tag::new(name, value)) + } + + fn parse_optional_column_option_generated( + &mut self, + ) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) { + let mut sequence_options = vec![]; + if self.expect_token(&Token::LParen).is_ok() { + sequence_options = self.parse_create_sequence_options()?; + self.expect_token(&Token::RParen)?; + } + Ok(Some(ColumnOption::Generated { + generated_as: GeneratedAs::Always, + sequence_options: Some(sequence_options), + generation_expr: None, + generation_expr_mode: None, + generated_keyword: true, + })) + } else if self.parse_keywords(&[ + Keyword::BY, + Keyword::DEFAULT, + Keyword::AS, + Keyword::IDENTITY, + ]) { + let mut sequence_options = vec![]; + if self.expect_token(&Token::LParen).is_ok() { + sequence_options = self.parse_create_sequence_options()?; + self.expect_token(&Token::RParen)?; + } + Ok(Some(ColumnOption::Generated { + generated_as: GeneratedAs::ByDefault, + sequence_options: Some(sequence_options), + generation_expr: None, + generation_expr_mode: None, + generated_keyword: true, + })) + } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) { + if self.expect_token(&Token::LParen).is_ok() { + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { + Ok(( + GeneratedAs::ExpStored, + Some(GeneratedExpressionMode::Stored), + )) + } else if dialect_of!(self is PostgreSqlDialect) { + // Postgres' AS IDENTITY branches are above, this one needs STORED + self.expected("STORED", self.peek_token()) + } else if self.parse_keywords(&[Keyword::VIRTUAL]) { + Ok((GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual))) + } else { + Ok((GeneratedAs::Always, None)) + }?; + + Ok(Some(ColumnOption::Generated { + generated_as: gen_as, + sequence_options: None, + generation_expr: Some(expr), + generation_expr_mode: expr_mode, + generated_keyword: true, + })) + } else { + Ok(None) + } + } else { + Ok(None) + } + } + + fn parse_optional_column_option_as(&mut self) -> Result, ParserError> { + // Some DBs allow 'AS (expr)', shorthand for GENERATED ALWAYS AS + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + + let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { + ( + GeneratedAs::ExpStored, + Some(GeneratedExpressionMode::Stored), + ) + } else if self.parse_keywords(&[Keyword::VIRTUAL]) { + (GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual)) + } else { + (GeneratedAs::Always, None) + }; + + Ok(Some(ColumnOption::Generated { + generated_as: gen_as, + sequence_options: None, + generation_expr: Some(expr), + generation_expr_mode: expr_mode, + generated_keyword: false, + })) + } + + /// Parses a parenthesized, comma-separated list of column definitions within a view. + pub(crate) fn parse_view_columns(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + if self.peek_token().token == Token::RParen { + self.next_token(); + Ok(vec![]) + } else { + let cols = self.parse_comma_separated(Parser::parse_view_column)?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } + } else { + Ok(vec![]) + } + } + + /// Parses a column definition within a view. + fn parse_view_column(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let options = if (dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::OPTIONS)) + || (dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COMMENT)) + { + self.prev_token(); + self.parse_optional_column_option()? + .map(|option| vec![option]) + } else { + None + }; + let data_type = if dialect_of!(self is ClickHouseDialect) { + Some(self.parse_data_type()?) + } else { + None + }; + Ok(ViewColumnDef { + name, + data_type, + options, + }) + } + + /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers + pub fn parse_parenthesized_column_list( + &mut self, + optional: IsOptional, + allow_empty: bool, + ) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + if allow_empty && self.peek_token().token == Token::RParen { + self.next_token(); + Ok(vec![]) + } else { + let cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } + } else if optional == Optional { + Ok(vec![]) + } else { + self.expected("a list of columns in parentheses", self.peek_token()) + } + } +} diff --git a/src/parser/comment.rs b/src/parser/comment.rs new file mode 100644 index 000000000..8c5bca13e --- /dev/null +++ b/src/parser/comment.rs @@ -0,0 +1,48 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_comment(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + + self.expect_keyword(Keyword::ON)?; + let token = self.next_token(); + + let (object_type, object_name) = match token.token { + Token::Word(w) if w.keyword == Keyword::COLUMN => { + (CommentObject::Column, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::TABLE => { + (CommentObject::Table, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::EXTENSION => { + (CommentObject::Extension, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::SCHEMA => { + (CommentObject::Schema, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::DATABASE => { + (CommentObject::Database, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::USER => { + (CommentObject::User, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::ROLE => { + (CommentObject::Role, self.parse_object_name(false)?) + } + _ => self.expected("comment object_type", token)?, + }; + + self.expect_keyword(Keyword::IS)?; + let comment = if self.parse_keyword(Keyword::NULL) { + None + } else { + Some(self.parse_literal_string()?) + }; + Ok(Statement::Comment { + object_type, + object_name, + comment, + if_exists, + }) + } +} diff --git a/src/parser/commit.rs b/src/parser/commit.rs new file mode 100644 index 000000000..900649174 --- /dev/null +++ b/src/parser/commit.rs @@ -0,0 +1,9 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_commit(&mut self) -> Result { + Ok(Statement::Commit { + chain: self.parse_commit_rollback_chain()?, + }) + } +} diff --git a/src/parser/copy.rs b/src/parser/copy.rs new file mode 100644 index 000000000..3310bd510 --- /dev/null +++ b/src/parser/copy.rs @@ -0,0 +1,224 @@ +use super::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse a copy statement + pub fn parse_copy(&mut self) -> Result { + let source; + if self.consume_token(&Token::LParen) { + source = CopySource::Query(self.parse_query()?); + self.expect_token(&Token::RParen)?; + } else { + let table_name = self.parse_object_name(false)?; + let columns = self.parse_parenthesized_column_list(Optional, false)?; + source = CopySource::Table { + table_name, + columns, + }; + } + let to = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::TO]) { + Some(Keyword::FROM) => false, + Some(Keyword::TO) => true, + _ => self.expected("FROM or TO", self.peek_token())?, + }; + if !to { + // Use a separate if statement to prevent Rust compiler from complaining about + // "if statement in this position is unstable: https://github.com/rust-lang/rust/issues/53667" + if let CopySource::Query(_) = source { + return Err(ParserError::ParserError( + "COPY ... FROM does not support query as a source".to_string(), + )); + } + } + let target = if self.parse_keyword(Keyword::STDIN) { + CopyTarget::Stdin + } else if self.parse_keyword(Keyword::STDOUT) { + CopyTarget::Stdout + } else if self.parse_keyword(Keyword::PROGRAM) { + CopyTarget::Program { + command: self.parse_literal_string()?, + } + } else { + CopyTarget::File { + filename: self.parse_literal_string()?, + } + }; + let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] + let mut options = vec![]; + if self.consume_token(&Token::LParen) { + options = self.parse_comma_separated(Parser::parse_copy_option)?; + self.expect_token(&Token::RParen)?; + } + let mut legacy_options = vec![]; + while let Some(opt) = self.maybe_parse(|parser| parser.parse_copy_legacy_option())? { + legacy_options.push(opt); + } + let values = if let CopyTarget::Stdin = target { + self.expect_token(&Token::SemiColon)?; + self.parse_tsv() + } else { + vec![] + }; + Ok(Statement::Copy { + source, + to, + target, + options, + legacy_options, + values, + }) + } + + fn parse_copy_option(&mut self) -> Result { + let ret = match self.parse_one_of_keywords(&[ + Keyword::FORMAT, + Keyword::FREEZE, + Keyword::DELIMITER, + Keyword::NULL, + Keyword::HEADER, + Keyword::QUOTE, + Keyword::ESCAPE, + Keyword::FORCE_QUOTE, + Keyword::FORCE_NOT_NULL, + Keyword::FORCE_NULL, + Keyword::ENCODING, + ]) { + Some(Keyword::FORMAT) => CopyOption::Format(self.parse_identifier(false)?), + Some(Keyword::FREEZE) => CopyOption::Freeze(!matches!( + self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), + Some(Keyword::FALSE) + )), + Some(Keyword::DELIMITER) => CopyOption::Delimiter(self.parse_literal_char()?), + Some(Keyword::NULL) => CopyOption::Null(self.parse_literal_string()?), + Some(Keyword::HEADER) => CopyOption::Header(!matches!( + self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), + Some(Keyword::FALSE) + )), + Some(Keyword::QUOTE) => CopyOption::Quote(self.parse_literal_char()?), + Some(Keyword::ESCAPE) => CopyOption::Escape(self.parse_literal_char()?), + Some(Keyword::FORCE_QUOTE) => { + CopyOption::ForceQuote(self.parse_parenthesized_column_list(Mandatory, false)?) + } + Some(Keyword::FORCE_NOT_NULL) => { + CopyOption::ForceNotNull(self.parse_parenthesized_column_list(Mandatory, false)?) + } + Some(Keyword::FORCE_NULL) => { + CopyOption::ForceNull(self.parse_parenthesized_column_list(Mandatory, false)?) + } + Some(Keyword::ENCODING) => CopyOption::Encoding(self.parse_literal_string()?), + _ => self.expected("option", self.peek_token())?, + }; + Ok(ret) + } + + fn parse_copy_legacy_option(&mut self) -> Result { + let ret = match self.parse_one_of_keywords(&[ + Keyword::BINARY, + Keyword::DELIMITER, + Keyword::NULL, + Keyword::CSV, + ]) { + Some(Keyword::BINARY) => CopyLegacyOption::Binary, + Some(Keyword::DELIMITER) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyOption::Delimiter(self.parse_literal_char()?) + } + Some(Keyword::NULL) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyOption::Null(self.parse_literal_string()?) + } + Some(Keyword::CSV) => CopyLegacyOption::Csv({ + let mut opts = vec![]; + while let Some(opt) = + self.maybe_parse(|parser| parser.parse_copy_legacy_csv_option())? + { + opts.push(opt); + } + opts + }), + _ => self.expected("option", self.peek_token())?, + }; + Ok(ret) + } + + fn parse_copy_legacy_csv_option(&mut self) -> Result { + let ret = match self.parse_one_of_keywords(&[ + Keyword::HEADER, + Keyword::QUOTE, + Keyword::ESCAPE, + Keyword::FORCE, + ]) { + Some(Keyword::HEADER) => CopyLegacyCsvOption::Header, + Some(Keyword::QUOTE) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyCsvOption::Quote(self.parse_literal_char()?) + } + Some(Keyword::ESCAPE) => { + let _ = self.parse_keyword(Keyword::AS); // [ AS ] + CopyLegacyCsvOption::Escape(self.parse_literal_char()?) + } + Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) => { + CopyLegacyCsvOption::ForceNotNull( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + ) + } + Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::QUOTE]) => { + CopyLegacyCsvOption::ForceQuote( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + ) + } + _ => self.expected("csv option", self.peek_token())?, + }; + Ok(ret) + } + + fn parse_literal_char(&mut self) -> Result { + let s = self.parse_literal_string()?; + if s.len() != 1 { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.span.start); + return parser_err!(format!("Expect a char, found {s:?}"), loc); + } + Ok(s.chars().next().unwrap()) + } + + /// Parse a tab separated values in + /// COPY payload + pub fn parse_tsv(&mut self) -> Vec> { + self.parse_tab_value() + } + + pub fn parse_tab_value(&mut self) -> Vec> { + let mut values = vec![]; + let mut content = String::from(""); + while let Some(t) = self.next_token_no_skip().map(|t| &t.token) { + match t { + Token::Whitespace(Whitespace::Tab) => { + values.push(Some(content.to_string())); + content.clear(); + } + Token::Whitespace(Whitespace::Newline) => { + values.push(Some(content.to_string())); + content.clear(); + } + Token::Backslash => { + if self.consume_token(&Token::Period) { + return values; + } + if let Token::Word(w) = self.next_token().token { + if w.value == "N" { + values.push(None); + } + } + } + _ => { + content.push_str(&t.to_string()); + } + } + } + values + } +} diff --git a/src/parser/create.rs b/src/parser/create.rs new file mode 100644 index 000000000..91f618dbc --- /dev/null +++ b/src/parser/create.rs @@ -0,0 +1,1669 @@ +use super::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse a SQL CREATE statement + pub fn parse_create(&mut self) -> Result { + let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); + let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); + let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); + let global = self.parse_one_of_keywords(&[Keyword::GLOBAL]).is_some(); + let transient = self.parse_one_of_keywords(&[Keyword::TRANSIENT]).is_some(); + let global: Option = if global { + Some(true) + } else if local { + Some(false) + } else { + None + }; + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + let persistent = dialect_of!(self is DuckDbDialect) + && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); + if self.parse_keyword(Keyword::TABLE) { + self.parse_create_table(or_replace, temporary, global, transient) + } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { + self.prev_token(); + self.parse_create_view(or_replace, temporary) + } else if self.parse_keyword(Keyword::POLICY) { + self.parse_create_policy() + } else if self.parse_keyword(Keyword::EXTERNAL) { + self.parse_create_external_table(or_replace) + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_create_function(or_replace, temporary) + } else if self.parse_keyword(Keyword::TRIGGER) { + self.parse_create_trigger(or_replace, false) + } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { + self.parse_create_trigger(or_replace, true) + } else if self.parse_keyword(Keyword::MACRO) { + self.parse_create_macro(or_replace, temporary) + } else if self.parse_keyword(Keyword::SECRET) { + self.parse_create_secret(or_replace, temporary, persistent) + } else if or_replace { + self.expected( + "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", + self.peek_token(), + ) + } else if self.parse_keyword(Keyword::EXTENSION) { + self.parse_create_extension() + } else if self.parse_keyword(Keyword::INDEX) { + self.parse_create_index(false) + } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { + self.parse_create_index(true) + } else if self.parse_keyword(Keyword::VIRTUAL) { + self.parse_create_virtual_table() + } else if self.parse_keyword(Keyword::SCHEMA) { + self.parse_create_schema() + } else if self.parse_keyword(Keyword::DATABASE) { + self.parse_create_database() + } else if self.parse_keyword(Keyword::ROLE) { + self.parse_create_role() + } else if self.parse_keyword(Keyword::SEQUENCE) { + self.parse_create_sequence(temporary) + } else if self.parse_keyword(Keyword::TYPE) { + self.parse_create_type() + } else if self.parse_keyword(Keyword::PROCEDURE) { + self.parse_create_procedure(or_alter) + } else { + self.expected("an object type after CREATE", self.peek_token()) + } + } + + pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { + let name = self.parse_object_name(false)?; + let params = self.parse_optional_procedure_parameters()?; + self.expect_keyword(Keyword::AS)?; + self.expect_keyword(Keyword::BEGIN)?; + let statements = self.parse_statements()?; + self.expect_keyword(Keyword::END)?; + Ok(Statement::CreateProcedure { + name, + or_alter, + params, + body: statements, + }) + } + + /// ```sql + /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] + /// ``` + /// + /// See [Postgres docs](https://www.postgresql.org/docs/current/sql-createsequence.html) for more details. + pub fn parse_create_sequence(&mut self, temporary: bool) -> Result { + //[ IF NOT EXISTS ] + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + //name + let name = self.parse_object_name(false)?; + //[ AS data_type ] + let mut data_type: Option = None; + if self.parse_keywords(&[Keyword::AS]) { + data_type = Some(self.parse_data_type()?) + } + let sequence_options = self.parse_create_sequence_options()?; + // [ OWNED BY { table_name.column_name | NONE } ] + let owned_by = if self.parse_keywords(&[Keyword::OWNED, Keyword::BY]) { + if self.parse_keywords(&[Keyword::NONE]) { + Some(ObjectName(vec![Ident::new("NONE")])) + } else { + Some(self.parse_object_name(false)?) + } + } else { + None + }; + Ok(Statement::CreateSequence { + temporary, + if_not_exists, + name, + data_type, + sequence_options, + owned_by, + }) + } + + pub fn parse_create_schema(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + + let schema_name = self.parse_schema_name()?; + + Ok(Statement::CreateSchema { + schema_name, + if_not_exists, + }) + } + + pub fn parse_create_type(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::AS)?; + + let mut attributes = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok(Statement::CreateType { + name, + representation: UserDefinedTypeRepresentation::Composite { attributes }, + }); + } + + loop { + let attr_name = self.parse_identifier(false)?; + let attr_data_type = self.parse_data_type()?; + let attr_collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_object_name(false)?) + } else { + None + }; + attributes.push(UserDefinedTypeCompositeAttributeDef { + name: attr_name, + data_type: attr_data_type, + collation: attr_collation, + }); + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma + break; + } else if !comma { + return self.expected("',' or ')' after attribute definition", self.peek_token()); + } + } + + Ok(Statement::CreateType { + name, + representation: UserDefinedTypeRepresentation::Composite { attributes }, + }) + } + + pub fn parse_index_options(&mut self) -> Result, ParserError> { + let mut options = Vec::new(); + + loop { + match self.parse_optional_index_option()? { + Some(index_option) => options.push(index_option), + None => return Ok(options), + } + } + } + + pub fn parse_index_type(&mut self) -> Result { + if self.parse_keyword(Keyword::BTREE) { + Ok(IndexType::BTree) + } else if self.parse_keyword(Keyword::HASH) { + Ok(IndexType::Hash) + } else { + self.expected("index type {BTREE | HASH}", self.peek_token()) + } + } + + #[must_use] + pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { + if self.parse_keyword(Keyword::KEY) { + KeyOrIndexDisplay::Key + } else if self.parse_keyword(Keyword::INDEX) { + KeyOrIndexDisplay::Index + } else { + KeyOrIndexDisplay::None + } + } + + pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { + if let Some(index_type) = self.parse_optional_using_then_index_type()? { + Ok(Some(IndexOption::Using(index_type))) + } else if self.parse_keyword(Keyword::COMMENT) { + let s = self.parse_literal_string()?; + Ok(Some(IndexOption::Comment(s))) + } else { + Ok(None) + } + } + + pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { + let comment = if self.parse_keyword(Keyword::COMMENT) { + let has_eq = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(if has_eq { + CommentDef::WithEq(str) + } else { + CommentDef::WithoutEq(str) + }), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; + Ok(comment) + } + + pub fn parse_optional_procedure_parameters( + &mut self, + ) -> Result>, ParserError> { + let mut params = vec![]; + if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { + return Ok(Some(params)); + } + loop { + if let Token::Word(_) = self.peek_token().token { + params.push(self.parse_procedure_param()?) + } + let comma = self.consume_token(&Token::Comma); + if self.consume_token(&Token::RParen) { + // allow a trailing comma, even though it's not in standard + break; + } else if !comma { + return self.expected("',' or ')' after parameter definition", self.peek_token()); + } + } + Ok(Some(params)) + } + + /// Parse [USING {BTREE | HASH}] + pub fn parse_optional_using_then_index_type( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::USING) { + Ok(Some(self.parse_index_type()?)) + } else { + Ok(None) + } + } + + pub fn parse_procedure_param(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let data_type = self.parse_data_type()?; + Ok(ProcedureParam { name, data_type }) + } + + pub fn parse_schema_name(&mut self) -> Result { + if self.parse_keyword(Keyword::AUTHORIZATION) { + Ok(SchemaName::UnnamedAuthorization( + self.parse_identifier(false)?, + )) + } else { + let name = self.parse_object_name(false)?; + + if self.parse_keyword(Keyword::AUTHORIZATION) { + Ok(SchemaName::NamedAuthorization( + name, + self.parse_identifier(false)?, + )) + } else { + Ok(SchemaName::Simple(name)) + } + } + } + + pub fn parse_create_database(&mut self) -> Result { + let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let db_name = self.parse_object_name(false)?; + let mut location = None; + let mut managed_location = None; + loop { + match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { + Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), + Some(Keyword::MANAGEDLOCATION) => { + managed_location = Some(self.parse_literal_string()?) + } + _ => break, + } + } + Ok(Statement::CreateDatabase { + db_name, + if_not_exists: ine, + location, + managed_location, + }) + } + + pub fn parse_optional_create_function_using( + &mut self, + ) -> Result, ParserError> { + if !self.parse_keyword(Keyword::USING) { + return Ok(None); + }; + let keyword = + self.expect_one_of_keywords(&[Keyword::JAR, Keyword::FILE, Keyword::ARCHIVE])?; + + let uri = self.parse_literal_string()?; + + match keyword { + Keyword::JAR => Ok(Some(CreateFunctionUsing::Jar(uri))), + Keyword::FILE => Ok(Some(CreateFunctionUsing::File(uri))), + Keyword::ARCHIVE => Ok(Some(CreateFunctionUsing::Archive(uri))), + _ => self.expected( + "JAR, FILE or ARCHIVE, got {:?}", + TokenWithSpan::wrap(Token::make_keyword(format!("{keyword:?}").as_str())), + ), + } + } + + pub fn parse_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + if dialect_of!(self is HiveDialect) { + self.parse_hive_create_function(or_replace, temporary) + } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.parse_postgres_create_function(or_replace, temporary) + } else if dialect_of!(self is DuckDbDialect) { + self.parse_create_macro(or_replace, temporary) + } else if dialect_of!(self is BigQueryDialect) { + self.parse_bigquery_create_function(or_replace, temporary) + } else { + self.prev_token(); + self.expected("an object type after CREATE", self.peek_token()) + } + } + + pub fn parse_create_trigger( + &mut self, + or_replace: bool, + is_constraint: bool, + ) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after CREATE", self.peek_token()); + } + + let name = self.parse_object_name(false)?; + let period = self.parse_trigger_period()?; + + let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let referenced_table_name = if self.parse_keyword(Keyword::FROM) { + self.parse_object_name(true).ok() + } else { + None + }; + + let characteristics = self.parse_constraint_characteristics()?; + + let mut referencing = vec![]; + if self.parse_keyword(Keyword::REFERENCING) { + while let Some(refer) = self.parse_trigger_referencing()? { + referencing.push(refer); + } + } + + self.expect_keyword(Keyword::FOR)?; + let include_each = self.parse_keyword(Keyword::EACH); + let trigger_object = + match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { + Keyword::ROW => TriggerObject::Row, + Keyword::STATEMENT => TriggerObject::Statement, + _ => unreachable!(), + }; + + let condition = self + .parse_keyword(Keyword::WHEN) + .then(|| self.parse_expr()) + .transpose()?; + + self.expect_keyword(Keyword::EXECUTE)?; + + let exec_body = self.parse_trigger_exec_body()?; + + Ok(Statement::CreateTrigger { + or_replace, + is_constraint, + name, + period, + events, + table_name, + referenced_table_name, + referencing, + trigger_object, + include_each, + condition, + exec_body, + characteristics, + }) + } + + pub fn parse_trigger_period(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::BEFORE, + Keyword::AFTER, + Keyword::INSTEAD, + ])? { + Keyword::BEFORE => TriggerPeriod::Before, + Keyword::AFTER => TriggerPeriod::After, + Keyword::INSTEAD => self + .expect_keyword(Keyword::OF) + .map(|_| TriggerPeriod::InsteadOf)?, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_event(&mut self) -> Result { + Ok( + match self.expect_one_of_keywords(&[ + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + Keyword::TRUNCATE, + ])? { + Keyword::INSERT => TriggerEvent::Insert, + Keyword::UPDATE => { + if self.parse_keyword(Keyword::OF) { + let cols = self.parse_comma_separated(|ident| { + Parser::parse_identifier(ident, false) + })?; + TriggerEvent::Update(cols) + } else { + TriggerEvent::Update(vec![]) + } + } + Keyword::DELETE => TriggerEvent::Delete, + Keyword::TRUNCATE => TriggerEvent::Truncate, + _ => unreachable!(), + }, + ) + } + + pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { + let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { + Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::OldTable + } + Some(Keyword::NEW) if self.parse_keyword(Keyword::TABLE) => { + TriggerReferencingType::NewTable + } + _ => { + return Ok(None); + } + }; + + let is_as = self.parse_keyword(Keyword::AS); + let transition_relation_name = self.parse_object_name(false)?; + Ok(Some(TriggerReferencing { + refer_type, + is_as, + transition_relation_name, + })) + } + + pub fn parse_trigger_exec_body(&mut self) -> Result { + Ok(TriggerExecBody { + exec_type: match self + .expect_one_of_keywords(&[Keyword::FUNCTION, Keyword::PROCEDURE])? + { + Keyword::FUNCTION => TriggerExecBodyType::Function, + Keyword::PROCEDURE => TriggerExecBodyType::Procedure, + _ => unreachable!(), + }, + func_desc: self.parse_function_desc()?, + }) + } + + pub fn parse_create_macro( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + if dialect_of!(self is DuckDbDialect | GenericDialect) { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); + None + } else { + Some(self.parse_comma_separated(Parser::parse_macro_arg)?) + }; + + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::AS)?; + + Ok(Statement::CreateMacro { + or_replace, + temporary, + name, + args, + definition: if self.parse_keyword(Keyword::TABLE) { + MacroDefinition::Table(self.parse_query()?) + } else { + MacroDefinition::Expr(self.parse_expr()?) + }, + }) + } else { + self.prev_token(); + self.expected("an object type after CREATE", self.peek_token()) + } + } + + fn parse_macro_arg(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let default_expr = + if self.consume_token(&Token::Assignment) || self.consume_token(&Token::RArrow) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(MacroArg { name, default_expr }) + } + + pub(crate) fn parse_create_sequence_options( + &mut self, + ) -> Result, ParserError> { + let mut sequence_options = vec![]; + //[ INCREMENT [ BY ] increment ] + if self.parse_keywords(&[Keyword::INCREMENT]) { + if self.parse_keywords(&[Keyword::BY]) { + sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, true)); + } else { + sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, false)); + } + } + //[ MINVALUE minvalue | NO MINVALUE ] + if self.parse_keyword(Keyword::MINVALUE) { + sequence_options.push(SequenceOptions::MinValue(Some(self.parse_number()?))); + } else if self.parse_keywords(&[Keyword::NO, Keyword::MINVALUE]) { + sequence_options.push(SequenceOptions::MinValue(None)); + } + //[ MAXVALUE maxvalue | NO MAXVALUE ] + if self.parse_keywords(&[Keyword::MAXVALUE]) { + sequence_options.push(SequenceOptions::MaxValue(Some(self.parse_number()?))); + } else if self.parse_keywords(&[Keyword::NO, Keyword::MAXVALUE]) { + sequence_options.push(SequenceOptions::MaxValue(None)); + } + + //[ START [ WITH ] start ] + if self.parse_keywords(&[Keyword::START]) { + if self.parse_keywords(&[Keyword::WITH]) { + sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, true)); + } else { + sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, false)); + } + } + //[ CACHE cache ] + if self.parse_keywords(&[Keyword::CACHE]) { + sequence_options.push(SequenceOptions::Cache(self.parse_number()?)); + } + // [ [ NO ] CYCLE ] + if self.parse_keywords(&[Keyword::NO, Keyword::CYCLE]) { + sequence_options.push(SequenceOptions::Cycle(true)); + } else if self.parse_keywords(&[Keyword::CYCLE]) { + sequence_options.push(SequenceOptions::Cycle(false)); + } + + Ok(sequence_options) + } + + pub fn parse_create_external_table( + &mut self, + or_replace: bool, + ) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + let (columns, constraints) = self.parse_columns()?; + + let hive_distribution = self.parse_hive_distribution()?; + let hive_formats = self.parse_hive_formats()?; + + let file_format = if let Some(ff) = &hive_formats.storage { + match ff { + HiveIOFormat::FileFormat { format } => Some(*format), + _ => None, + } + } else { + None + }; + let location = hive_formats.location.clone(); + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + Ok(CreateTableBuilder::new(table_name) + .columns(columns) + .constraints(constraints) + .hive_distribution(hive_distribution) + .hive_formats(Some(hive_formats)) + .table_properties(table_properties) + .or_replace(or_replace) + .if_not_exists(if_not_exists) + .external(true) + .file_format(file_format) + .location(location) + .build()) + } + + pub fn parse_file_format(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::AVRO => Ok(FileFormat::AVRO), + Keyword::JSONFILE => Ok(FileFormat::JSONFILE), + Keyword::ORC => Ok(FileFormat::ORC), + Keyword::PARQUET => Ok(FileFormat::PARQUET), + Keyword::RCFILE => Ok(FileFormat::RCFILE), + Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), + Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), + _ => self.expected("fileformat", next_token), + }, + _ => self.expected("fileformat", next_token), + } + } + + pub fn parse_analyze_format(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::TEXT => Ok(AnalyzeFormat::TEXT), + Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ), + Keyword::JSON => Ok(AnalyzeFormat::JSON), + _ => self.expected("fileformat", next_token), + }, + _ => self.expected("fileformat", next_token), + } + } + + pub fn parse_create_view( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let materialized = self.parse_keyword(Keyword::MATERIALIZED); + self.expect_keyword(Keyword::VIEW)?; + let if_not_exists = dialect_of!(self is BigQueryDialect|SQLiteDialect|GenericDialect) + && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). + // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. + let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); + let name = self.parse_object_name(allow_unquoted_hyphen)?; + let columns = self.parse_view_columns()?; + let mut options = CreateTableOptions::None; + let with_options = self.parse_options(Keyword::WITH)?; + if !with_options.is_empty() { + options = CreateTableOptions::With(with_options); + } + + let cluster_by = if self.parse_keyword(Keyword::CLUSTER) { + self.expect_keyword(Keyword::BY)?; + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if let Some(opts) = self.maybe_parse_options(Keyword::OPTIONS)? { + if !opts.is_empty() { + options = CreateTableOptions::Options(opts); + } + }; + } + + let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::TO) + { + Some(self.parse_object_name(false)?) + } else { + None + }; + + let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::COMMENT) + { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(str), + _ => self.expected("string literal", next_token)?, + } + } else { + None + }; + + self.expect_keyword(Keyword::AS)?; + let query = self.parse_query()?; + // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. + + let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) + && self.parse_keywords(&[ + Keyword::WITH, + Keyword::NO, + Keyword::SCHEMA, + Keyword::BINDING, + ]); + + Ok(Statement::CreateView { + name, + columns, + query, + materialized, + or_replace, + options, + cluster_by, + comment, + with_no_schema_binding, + if_not_exists, + temporary, + to, + }) + } + + pub fn parse_create_role(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; + + let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] + + let optional_keywords = if dialect_of!(self is MsSqlDialect) { + vec![Keyword::AUTHORIZATION] + } else if dialect_of!(self is PostgreSqlDialect) { + vec![ + Keyword::LOGIN, + Keyword::NOLOGIN, + Keyword::INHERIT, + Keyword::NOINHERIT, + Keyword::BYPASSRLS, + Keyword::NOBYPASSRLS, + Keyword::PASSWORD, + Keyword::CREATEDB, + Keyword::NOCREATEDB, + Keyword::CREATEROLE, + Keyword::NOCREATEROLE, + Keyword::SUPERUSER, + Keyword::NOSUPERUSER, + Keyword::REPLICATION, + Keyword::NOREPLICATION, + Keyword::CONNECTION, + Keyword::VALID, + Keyword::IN, + Keyword::ROLE, + Keyword::ADMIN, + Keyword::USER, + ] + } else { + vec![] + }; + + // MSSQL + let mut authorization_owner = None; + // Postgres + let mut login = None; + let mut inherit = None; + let mut bypassrls = None; + let mut password = None; + let mut create_db = None; + let mut create_role = None; + let mut superuser = None; + let mut replication = None; + let mut connection_limit = None; + let mut valid_until = None; + let mut in_role = vec![]; + let mut in_group = vec![]; + let mut role = vec![]; + let mut user = vec![]; + let mut admin = vec![]; + + while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { + let loc = self + .tokens + .get(self.index - 1) + .map_or(Location { line: 0, column: 0 }, |t| t.span.start); + match keyword { + Keyword::AUTHORIZATION => { + if authorization_owner.is_some() { + parser_err!("Found multiple AUTHORIZATION", loc) + } else { + authorization_owner = Some(self.parse_object_name(false)?); + Ok(()) + } + } + Keyword::LOGIN | Keyword::NOLOGIN => { + if login.is_some() { + parser_err!("Found multiple LOGIN or NOLOGIN", loc) + } else { + login = Some(keyword == Keyword::LOGIN); + Ok(()) + } + } + Keyword::INHERIT | Keyword::NOINHERIT => { + if inherit.is_some() { + parser_err!("Found multiple INHERIT or NOINHERIT", loc) + } else { + inherit = Some(keyword == Keyword::INHERIT); + Ok(()) + } + } + Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { + if bypassrls.is_some() { + parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) + } else { + bypassrls = Some(keyword == Keyword::BYPASSRLS); + Ok(()) + } + } + Keyword::CREATEDB | Keyword::NOCREATEDB => { + if create_db.is_some() { + parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) + } else { + create_db = Some(keyword == Keyword::CREATEDB); + Ok(()) + } + } + Keyword::CREATEROLE | Keyword::NOCREATEROLE => { + if create_role.is_some() { + parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) + } else { + create_role = Some(keyword == Keyword::CREATEROLE); + Ok(()) + } + } + Keyword::SUPERUSER | Keyword::NOSUPERUSER => { + if superuser.is_some() { + parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) + } else { + superuser = Some(keyword == Keyword::SUPERUSER); + Ok(()) + } + } + Keyword::REPLICATION | Keyword::NOREPLICATION => { + if replication.is_some() { + parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) + } else { + replication = Some(keyword == Keyword::REPLICATION); + Ok(()) + } + } + Keyword::PASSWORD => { + if password.is_some() { + parser_err!("Found multiple PASSWORD", loc) + } else { + password = if self.parse_keyword(Keyword::NULL) { + Some(Password::NullPassword) + } else { + Some(Password::Password(Expr::Value(self.parse_value()?))) + }; + Ok(()) + } + } + Keyword::CONNECTION => { + self.expect_keyword(Keyword::LIMIT)?; + if connection_limit.is_some() { + parser_err!("Found multiple CONNECTION LIMIT", loc) + } else { + connection_limit = Some(Expr::Value(self.parse_number_value()?)); + Ok(()) + } + } + Keyword::VALID => { + self.expect_keyword(Keyword::UNTIL)?; + if valid_until.is_some() { + parser_err!("Found multiple VALID UNTIL", loc) + } else { + valid_until = Some(Expr::Value(self.parse_value()?)); + Ok(()) + } + } + Keyword::IN => { + if self.parse_keyword(Keyword::ROLE) { + if !in_role.is_empty() { + parser_err!("Found multiple IN ROLE", loc) + } else { + in_role = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } else if self.parse_keyword(Keyword::GROUP) { + if !in_group.is_empty() { + parser_err!("Found multiple IN GROUP", loc) + } else { + in_group = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } else { + self.expected("ROLE or GROUP after IN", self.peek_token()) + } + } + Keyword::ROLE => { + if !role.is_empty() { + parser_err!("Found multiple ROLE", loc) + } else { + role = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } + Keyword::USER => { + if !user.is_empty() { + parser_err!("Found multiple USER", loc) + } else { + user = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } + Keyword::ADMIN => { + if !admin.is_empty() { + parser_err!("Found multiple ADMIN", loc) + } else { + admin = self.parse_comma_separated(|p| p.parse_identifier(false))?; + Ok(()) + } + } + _ => break, + }? + } + + Ok(Statement::CreateRole { + names, + if_not_exists, + login, + inherit, + bypassrls, + password, + create_db, + create_role, + replication, + superuser, + connection_limit, + valid_until, + in_role, + in_group, + role, + user, + admin, + authorization_owner, + }) + } + + pub fn parse_owner(&mut self) -> Result { + let owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { + Some(Keyword::CURRENT_USER) => Owner::CurrentUser, + Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, + Some(Keyword::SESSION_USER) => Owner::SessionUser, + Some(_) => unreachable!(), + None => { + match self.parse_identifier(false) { + Ok(ident) => Owner::Ident(ident), + Err(e) => { + return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) + } + } + } + }; + Ok(owner) + } + + /// ```sql + /// CREATE POLICY name ON table_name [ AS { PERMISSIVE | RESTRICTIVE } ] + /// [ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ] + /// [ TO { role_name | PUBLIC | CURRENT_USER | CURRENT_ROLE | SESSION_USER } [, ...] ] + /// [ USING ( using_expression ) ] + /// [ WITH CHECK ( with_check_expression ) ] + /// ``` + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) + pub fn parse_create_policy(&mut self) -> Result { + let name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + + let policy_type = if self.parse_keyword(Keyword::AS) { + let keyword = + self.expect_one_of_keywords(&[Keyword::PERMISSIVE, Keyword::RESTRICTIVE])?; + Some(match keyword { + Keyword::PERMISSIVE => CreatePolicyType::Permissive, + Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, + _ => unreachable!(), + }) + } else { + None + }; + + let command = if self.parse_keyword(Keyword::FOR) { + let keyword = self.expect_one_of_keywords(&[ + Keyword::ALL, + Keyword::SELECT, + Keyword::INSERT, + Keyword::UPDATE, + Keyword::DELETE, + ])?; + Some(match keyword { + Keyword::ALL => CreatePolicyCommand::All, + Keyword::SELECT => CreatePolicyCommand::Select, + Keyword::INSERT => CreatePolicyCommand::Insert, + Keyword::UPDATE => CreatePolicyCommand::Update, + Keyword::DELETE => CreatePolicyCommand::Delete, + _ => unreachable!(), + }) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_comma_separated(|p| p.parse_owner())?) + } else { + None + }; + + let using = if self.parse_keyword(Keyword::USING) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + + let with_check = if self.parse_keywords(&[Keyword::WITH, Keyword::CHECK]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + + Ok(CreatePolicy { + name, + table_name, + policy_type, + command, + to, + using, + with_check, + }) + } + + pub fn parse_create_index(&mut self, unique: bool) -> Result { + let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { + let index_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + Some(index_name) + } else { + None + }; + let table_name = self.parse_object_name(false)?; + let using = if self.parse_keyword(Keyword::USING) { + Some(self.parse_identifier(false)?) + } else { + None + }; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + + let include = if self.parse_keyword(Keyword::INCLUDE) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + columns + } else { + vec![] + }; + + let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword(Keyword::DISTINCT)?; + Some(!not) + } else { + None + }; + + let with = if self.dialect.supports_create_index_with_clause() + && self.parse_keyword(Keyword::WITH) + { + self.expect_token(&Token::LParen)?; + let with_params = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + with_params + } else { + Vec::new() + }; + + let predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Statement::CreateIndex(CreateIndex { + name: index_name, + table_name, + using, + columns, + unique, + concurrently, + if_not_exists, + include, + nulls_distinct, + with, + predicate, + })) + } + + pub fn parse_create_extension(&mut self) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + + let (schema, version, cascade) = if self.parse_keyword(Keyword::WITH) { + let schema = if self.parse_keyword(Keyword::SCHEMA) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let version = if self.parse_keyword(Keyword::VERSION) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let cascade = self.parse_keyword(Keyword::CASCADE); + + (schema, version, cascade) + } else { + (None, None, false) + }; + + Ok(Statement::CreateExtension { + name, + if_not_exists, + schema, + version, + cascade, + }) + } + + pub fn parse_create_table( + &mut self, + or_replace: bool, + temporary: bool, + global: Option, + transient: bool, + ) -> Result { + let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(allow_unquoted_hyphen)?; + + // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs + let on_cluster = self.parse_optional_on_cluster()?; + + let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { + self.parse_object_name(allow_unquoted_hyphen).ok() + } else { + None + }; + + let clone = if self.parse_keyword(Keyword::CLONE) { + self.parse_object_name(allow_unquoted_hyphen).ok() + } else { + None + }; + + // parse optional column list (schema) + let (columns, constraints) = self.parse_columns()?; + let mut comment = if dialect_of!(self is HiveDialect) + && self.parse_keyword(Keyword::COMMENT) + { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(str) => Some(CommentDef::AfterColumnDefsWithoutEq(str)), + _ => self.expected("comment", next_token)?, + } + } else { + None + }; + + // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` + let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); + + let hive_distribution = self.parse_hive_distribution()?; + let clustered_by = self.parse_optional_clustered_by()?; + let hive_formats = self.parse_hive_formats()?; + // PostgreSQL supports `WITH ( options )`, before `AS` + let with_options = self.parse_options(Keyword::WITH)?; + let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; + + let engine = if self.parse_keyword(Keyword::ENGINE) { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let name = w.value; + let parameters = if self.peek_token() == Token::LParen { + Some(self.parse_parenthesized_identifiers()?) + } else { + None + }; + Some(TableEngine { name, parameters }) + } + _ => self.expected("identifier", next_token)?, + } + } else { + None + }; + + let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { + let _ = self.consume_token(&Token::Eq); + let next_token = self.next_token(); + match next_token.token { + Token::Number(s, _) => Some(Self::parse::(s, next_token.span.start)?), + _ => self.expected("literal int", next_token)?, + } + } else { + None + }; + + // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` + // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key + let primary_key = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + if self.consume_token(&Token::LParen) { + let columns = if self.peek_token() != Token::RParen { + self.parse_comma_separated(|p| p.parse_expr())? + } else { + vec![] + }; + self.expect_token(&Token::RParen)?; + Some(OneOrManyWithParens::Many(columns)) + } else { + Some(OneOrManyWithParens::One(self.parse_expr()?)) + } + } else { + None + }; + + let create_table_config = self.parse_optional_create_table_config()?; + + let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => Some(w.value), + _ => self.expected("identifier", next_token)?, + } + } else { + None + }; + + let collation = if self.parse_keywords(&[Keyword::COLLATE]) { + self.expect_token(&Token::Eq)?; + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => Some(w.value), + _ => self.expected("identifier", next_token)?, + } + } else { + None + }; + + let on_commit: Option = + if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DELETE, Keyword::ROWS]) + { + Some(OnCommit::DeleteRows) + } else if self.parse_keywords(&[ + Keyword::ON, + Keyword::COMMIT, + Keyword::PRESERVE, + Keyword::ROWS, + ]) { + Some(OnCommit::PreserveRows) + } else if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DROP]) { + Some(OnCommit::Drop) + } else { + None + }; + + let strict = self.parse_keyword(Keyword::STRICT); + + // Excludes Hive dialect here since it has been handled after table column definitions. + if !dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) { + // rewind the COMMENT keyword + self.prev_token(); + comment = self.parse_optional_inline_comment()? + }; + + // Parse optional `AS ( query )` + let query = if self.parse_keyword(Keyword::AS) { + Some(self.parse_query()?) + } else if self.dialect.supports_create_table_select() && self.parse_keyword(Keyword::SELECT) + { + // rewind the SELECT keyword + self.prev_token(); + Some(self.parse_query()?) + } else { + None + }; + + Ok(CreateTableBuilder::new(table_name) + .temporary(temporary) + .columns(columns) + .constraints(constraints) + .with_options(with_options) + .table_properties(table_properties) + .or_replace(or_replace) + .if_not_exists(if_not_exists) + .transient(transient) + .hive_distribution(hive_distribution) + .hive_formats(Some(hive_formats)) + .global(global) + .query(query) + .without_rowid(without_rowid) + .like(like) + .clone_clause(clone) + .engine(engine) + .comment(comment) + .auto_increment_offset(auto_increment_offset) + .order_by(order_by) + .default_charset(default_charset) + .collation(collation) + .on_commit(on_commit) + .on_cluster(on_cluster) + .clustered_by(clustered_by) + .partition_by(create_table_config.partition_by) + .cluster_by(create_table_config.cluster_by) + .options(create_table_config.options) + .primary_key(primary_key) + .strict(strict) + .build()) + } + + /// Parse configuration like partitioning, clustering information during the table creation. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) + /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) + fn parse_optional_create_table_config( + &mut self, + ) -> Result { + let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) + { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + + let mut cluster_by = None; + let mut options = None; + if dialect_of!(self is BigQueryDialect | GenericDialect) { + if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + cluster_by = Some(WrappedCollection::NoWrapping( + self.parse_comma_separated(|p| p.parse_identifier(false))?, + )); + }; + + if let Token::Word(word) = self.peek_token().token { + if word.keyword == Keyword::OPTIONS { + options = Some(self.parse_options(Keyword::OPTIONS)?); + } + }; + } + + Ok(CreateTableConfiguration { + partition_by, + cluster_by, + options, + }) + } + + pub fn parse_optional_clustered_by(&mut self) -> Result, ParserError> { + let clustered_by = if dialect_of!(self is HiveDialect|GenericDialect) + && self.parse_keywords(&[Keyword::CLUSTERED, Keyword::BY]) + { + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + let sorted_by = if self.parse_keywords(&[Keyword::SORTED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let sorted_by_columns = self.parse_comma_separated(|p| p.parse_order_by_expr())?; + self.expect_token(&Token::RParen)?; + Some(sorted_by_columns) + } else { + None + }; + + self.expect_keyword(Keyword::INTO)?; + let num_buckets = self.parse_number_value()?; + self.expect_keyword(Keyword::BUCKETS)?; + Some(ClusteredBy { + columns, + sorted_by, + num_buckets, + }) + } else { + None + }; + Ok(clustered_by) + } + + pub fn parse_referential_action(&mut self) -> Result { + if self.parse_keyword(Keyword::RESTRICT) { + Ok(ReferentialAction::Restrict) + } else if self.parse_keyword(Keyword::CASCADE) { + Ok(ReferentialAction::Cascade) + } else if self.parse_keywords(&[Keyword::SET, Keyword::NULL]) { + Ok(ReferentialAction::SetNull) + } else if self.parse_keywords(&[Keyword::NO, Keyword::ACTION]) { + Ok(ReferentialAction::NoAction) + } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { + Ok(ReferentialAction::SetDefault) + } else { + self.expected( + "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", + self.peek_token(), + ) + } + } + + pub fn parse_constraint_characteristics( + &mut self, + ) -> Result, ParserError> { + let mut cc = ConstraintCharacteristics::default(); + + loop { + if cc.deferrable.is_none() && self.parse_keywords(&[Keyword::NOT, Keyword::DEFERRABLE]) + { + cc.deferrable = Some(false); + } else if cc.deferrable.is_none() && self.parse_keyword(Keyword::DEFERRABLE) { + cc.deferrable = Some(true); + } else if cc.initially.is_none() && self.parse_keyword(Keyword::INITIALLY) { + if self.parse_keyword(Keyword::DEFERRED) { + cc.initially = Some(DeferrableInitial::Deferred); + } else if self.parse_keyword(Keyword::IMMEDIATE) { + cc.initially = Some(DeferrableInitial::Immediate); + } else { + self.expected("one of DEFERRED or IMMEDIATE", self.peek_token())?; + } + } else if cc.enforced.is_none() && self.parse_keyword(Keyword::ENFORCED) { + cc.enforced = Some(true); + } else if cc.enforced.is_none() + && self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) + { + cc.enforced = Some(false); + } else { + break; + } + } + + if cc.deferrable.is_some() || cc.initially.is_some() || cc.enforced.is_some() { + Ok(Some(cc)) + } else { + Ok(None) + } + } + + pub fn parse_optional_table_constraint( + &mut self, + ) -> Result, ParserError> { + let name = if self.parse_keyword(Keyword::CONSTRAINT) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) if w.keyword == Keyword::UNIQUE => { + let index_type_display = self.parse_index_type_display(); + if !dialect_of!(self is GenericDialect | MySqlDialect) + && !index_type_display.is_none() + { + return self + .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); + } + + let nulls_distinct = self.parse_optional_nulls_distinct()?; + + // optional index name + let index_name = self.parse_optional_indent()?; + let index_type = self.parse_optional_using_then_index_type()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let index_options = self.parse_index_options()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(TableConstraint::Unique { + name, + index_name, + index_type_display, + index_type, + columns, + index_options, + characteristics, + nulls_distinct, + })) + } + Token::Word(w) if w.keyword == Keyword::PRIMARY => { + // after `PRIMARY` always stay `KEY` + self.expect_keyword(Keyword::KEY)?; + + // optional index name + let index_name = self.parse_optional_indent()?; + let index_type = self.parse_optional_using_then_index_type()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let index_options = self.parse_index_options()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some(TableConstraint::PrimaryKey { + name, + index_name, + index_type, + columns, + index_options, + characteristics, + })) + } + Token::Word(w) if w.keyword == Keyword::FOREIGN => { + self.expect_keyword(Keyword::KEY)?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + self.expect_keyword(Keyword::REFERENCES)?; + let foreign_table = self.parse_object_name(false)?; + let referred_columns = self.parse_parenthesized_column_list(Mandatory, false)?; + let mut on_delete = None; + let mut on_update = None; + loop { + if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { + on_delete = Some(self.parse_referential_action()?); + } else if on_update.is_none() + && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) + { + on_update = Some(self.parse_referential_action()?); + } else { + break; + } + } + + let characteristics = self.parse_constraint_characteristics()?; + + Ok(Some(TableConstraint::ForeignKey { + name, + columns, + foreign_table, + referred_columns, + on_delete, + on_update, + characteristics, + })) + } + Token::Word(w) if w.keyword == Keyword::CHECK => { + self.expect_token(&Token::LParen)?; + let expr = Box::new(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + Ok(Some(TableConstraint::Check { name, expr })) + } + Token::Word(w) + if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) + && dialect_of!(self is GenericDialect | MySqlDialect) + && name.is_none() => + { + let display_as_key = w.keyword == Keyword::KEY; + + let name = match self.peek_token().token { + Token::Word(word) if word.keyword == Keyword::USING => None, + _ => self.parse_optional_indent()?, + }; + + let index_type = self.parse_optional_using_then_index_type()?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + Ok(Some(TableConstraint::Index { + display_as_key, + name, + index_type, + columns, + })) + } + Token::Word(w) + if (w.keyword == Keyword::FULLTEXT || w.keyword == Keyword::SPATIAL) + && dialect_of!(self is GenericDialect | MySqlDialect) => + { + if let Some(name) = name { + return self.expected( + "FULLTEXT or SPATIAL option without constraint name", + TokenWithSpan { + token: Token::make_keyword(&name.to_string()), + span: next_token.span, + }, + ); + } + + let fulltext = w.keyword == Keyword::FULLTEXT; + + let index_type_display = self.parse_index_type_display(); + + let opt_index_name = self.parse_optional_indent()?; + + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + Ok(Some(TableConstraint::FulltextOrSpatial { + fulltext, + index_type_display, + opt_index_name, + columns, + })) + } + _ => { + if name.is_some() { + self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", next_token) + } else { + self.prev_token(); + Ok(None) + } + } + } + } + + fn parse_optional_nulls_distinct(&mut self) -> Result { + Ok(if self.parse_keyword(Keyword::NULLS) { + let not = self.parse_keyword(Keyword::NOT); + self.expect_keyword(Keyword::DISTINCT)?; + if not { + NullsDistinctOption::NotDistinct + } else { + NullsDistinctOption::Distinct + } + } else { + NullsDistinctOption::None + }) + } +} diff --git a/src/parser/deallocate.rs b/src/parser/deallocate.rs new file mode 100644 index 000000000..afdb297b9 --- /dev/null +++ b/src/parser/deallocate.rs @@ -0,0 +1,9 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_deallocate(&mut self) -> Result { + let prepare = self.parse_keyword(Keyword::PREPARE); + let name = self.parse_identifier(false)?; + Ok(Statement::Deallocate { name, prepare }) + } +} diff --git a/src/parser/declare.rs b/src/parser/declare.rs new file mode 100644 index 000000000..2c2980457 --- /dev/null +++ b/src/parser/declare.rs @@ -0,0 +1,76 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a `DECLARE` statement. + /// + /// ```sql + /// DECLARE name [ BINARY ] [ ASENSITIVE | INSENSITIVE ] [ [ NO ] SCROLL ] + /// CURSOR [ { WITH | WITHOUT } HOLD ] FOR query + /// ``` + /// + /// The syntax can vary significantly between warehouses. See the grammar + /// on the warehouse specific function in such cases. + pub fn parse_declare(&mut self) -> Result { + if dialect_of!(self is BigQueryDialect) { + return self.parse_big_query_declare(); + } + if dialect_of!(self is SnowflakeDialect) { + return self.parse_snowflake_declare(); + } + if dialect_of!(self is MsSqlDialect) { + return self.parse_mssql_declare(); + } + + let name = self.parse_identifier(false)?; + + let binary = Some(self.parse_keyword(Keyword::BINARY)); + let sensitive = if self.parse_keyword(Keyword::INSENSITIVE) { + Some(true) + } else if self.parse_keyword(Keyword::ASENSITIVE) { + Some(false) + } else { + None + }; + let scroll = if self.parse_keyword(Keyword::SCROLL) { + Some(true) + } else if self.parse_keywords(&[Keyword::NO, Keyword::SCROLL]) { + Some(false) + } else { + None + }; + + self.expect_keyword(Keyword::CURSOR)?; + let declare_type = Some(DeclareType::Cursor); + + let hold = match self.parse_one_of_keywords(&[Keyword::WITH, Keyword::WITHOUT]) { + Some(keyword) => { + self.expect_keyword(Keyword::HOLD)?; + + match keyword { + Keyword::WITH => Some(true), + Keyword::WITHOUT => Some(false), + _ => unreachable!(), + } + } + None => None, + }; + + self.expect_keyword(Keyword::FOR)?; + + let query = Some(self.parse_query()?); + + Ok(Statement::Declare { + stmts: vec![Declare { + names: vec![name], + data_type: None, + assignment: None, + declare_type, + binary, + sensitive, + scroll, + hold, + for_query: query, + }], + }) + } +} diff --git a/src/parser/delete.rs b/src/parser/delete.rs new file mode 100644 index 000000000..5d2baf499 --- /dev/null +++ b/src/parser/delete.rs @@ -0,0 +1,60 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_delete(&mut self) -> Result { + let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { + // `FROM` keyword is optional in BigQuery SQL. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement + if dialect_of!(self is BigQueryDialect | GenericDialect) { + (vec![], false) + } else { + let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; + self.expect_keyword(Keyword::FROM)?; + (tables, true) + } + } else { + (vec![], true) + }; + + let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; + let using = if self.parse_keyword(Keyword::USING) { + Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let returning = if self.parse_keyword(Keyword::RETURNING) { + Some(self.parse_comma_separated(Parser::parse_select_item)?) + } else { + None + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + let limit = if self.parse_keyword(Keyword::LIMIT) { + self.parse_limit()? + } else { + None + }; + + Ok(Statement::Delete(Delete { + tables, + from: if with_from_keyword { + FromTable::WithFromKeyword(from) + } else { + FromTable::WithoutKeyword(from) + }, + using, + selection, + returning, + order_by, + limit, + })) + } +} diff --git a/src/parser/dialects/bigquery.rs b/src/parser/dialects/bigquery.rs new file mode 100644 index 000000000..37f4c452c --- /dev/null +++ b/src/parser/dialects/bigquery.rs @@ -0,0 +1,135 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse `CREATE FUNCTION` for [BigQuery] + /// + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement + pub fn parse_bigquery_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let name = self.parse_object_name(false)?; + + let parse_function_param = + |parser: &mut Parser| -> Result { + let name = parser.parse_identifier(false)?; + let data_type = parser.parse_data_type()?; + Ok(OperateFunctionArg { + mode: None, + name: Some(name), + data_type, + default_expr: None, + }) + }; + self.expect_token(&Token::LParen)?; + let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; + self.expect_token(&Token::RParen)?; + + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) + } else { + None + }; + + let determinism_specifier = if self.parse_keyword(Keyword::DETERMINISTIC) { + Some(FunctionDeterminismSpecifier::Deterministic) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::DETERMINISTIC]) { + Some(FunctionDeterminismSpecifier::NotDeterministic) + } else { + None + }; + + let language = if self.parse_keyword(Keyword::LANGUAGE) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let remote_connection = + if self.parse_keywords(&[Keyword::REMOTE, Keyword::WITH, Keyword::CONNECTION]) { + Some(self.parse_object_name(false)?) + } else { + None + }; + + // `OPTIONS` may come before of after the function body but + // may be specified at most once. + let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; + + let function_body = if remote_connection.is_none() { + self.expect_keyword(Keyword::AS)?; + let expr = self.parse_expr()?; + if options.is_none() { + options = self.maybe_parse_options(Keyword::OPTIONS)?; + Some(CreateFunctionBody::AsBeforeOptions(expr)) + } else { + Some(CreateFunctionBody::AsAfterOptions(expr)) + } + } else { + None + }; + + Ok(Statement::CreateFunction(CreateFunction { + or_replace, + temporary, + if_not_exists, + name, + args: Some(args), + return_type, + function_body, + language, + determinism_specifier, + options, + remote_connection, + using: None, + behavior: None, + called_on_null: None, + parallel: None, + })) + } + + /// Parse a [BigQuery] `DECLARE` statement. + /// + /// Syntax: + /// ```text + /// DECLARE variable_name[, ...] [{ | }]; + /// ``` + /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#declare + pub fn parse_big_query_declare(&mut self) -> Result { + let names = self.parse_comma_separated(|parser| Parser::parse_identifier(parser, false))?; + + let data_type = match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::DEFAULT => None, + _ => Some(self.parse_data_type()?), + }; + + let expr = if data_type.is_some() { + if self.parse_keyword(Keyword::DEFAULT) { + Some(self.parse_expr()?) + } else { + None + } + } else { + // If no variable type - default expression must be specified, per BQ docs. + // i.e `DECLARE foo;` is invalid. + self.expect_keyword(Keyword::DEFAULT)?; + Some(self.parse_expr()?) + }; + + Ok(Statement::Declare { + stmts: vec![Declare { + names, + data_type, + assignment: expr.map(|expr| DeclareAssignment::Default(Box::new(expr))), + declare_type: None, + binary: None, + sensitive: None, + scroll: None, + hold: None, + for_query: None, + }], + }) + } +} diff --git a/src/parser/dialects/clickhouse.rs b/src/parser/dialects/clickhouse.rs new file mode 100644 index 000000000..5fb4a6a32 --- /dev/null +++ b/src/parser/dialects/clickhouse.rs @@ -0,0 +1,50 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse clickhouse [map] + /// + /// Syntax + /// + /// ```sql + /// Map(key_data_type, value_data_type) + /// ``` + /// + /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map + pub(crate) fn parse_click_house_map_def( + &mut self, + ) -> Result<(DataType, DataType), ParserError> { + self.expect_keyword(Keyword::MAP)?; + self.expect_token(&Token::LParen)?; + let key_data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let value_data_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + Ok((key_data_type, value_data_type)) + } + + /// Parse clickhouse [tuple] + /// + /// Syntax + /// + /// ```sql + /// Tuple([field_name] field_type, ...) + /// ``` + /// + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + pub(crate) fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::TUPLE)?; + self.expect_token(&Token::LParen)?; + let mut field_defs = vec![]; + loop { + let (def, _) = self.parse_struct_field_def()?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RParen)?; + + Ok(field_defs) + } +} diff --git a/src/parser/dialects/duckdb.rs b/src/parser/dialects/duckdb.rs new file mode 100644 index 000000000..c6a642ee8 --- /dev/null +++ b/src/parser/dialects/duckdb.rs @@ -0,0 +1,226 @@ +use crate::parser::*; + +// DuckDB related parsing + +impl<'a> Parser<'a> { + pub fn parse_attach_duckdb_database_options( + &mut self, + ) -> Result, ParserError> { + if !self.consume_token(&Token::LParen) { + return Ok(vec![]); + } + + let mut options = vec![]; + loop { + if self.parse_keyword(Keyword::READ_ONLY) { + let boolean = if self.parse_keyword(Keyword::TRUE) { + Some(true) + } else if self.parse_keyword(Keyword::FALSE) { + Some(false) + } else { + None + }; + options.push(AttachDuckDBDatabaseOption::ReadOnly(boolean)); + } else if self.parse_keyword(Keyword::TYPE) { + let ident = self.parse_identifier(false)?; + options.push(AttachDuckDBDatabaseOption::Type(ident)); + } else { + return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); + }; + + if self.consume_token(&Token::RParen) { + return Ok(options); + } else if self.consume_token(&Token::Comma) { + continue; + } else { + return self.expected("expected one of: ')', ','", self.peek_token()); + } + } + } + + pub fn parse_attach_duckdb_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let database_path = self.parse_identifier(false)?; + let database_alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let attach_options = self.parse_attach_duckdb_database_options()?; + Ok(Statement::AttachDuckDBDatabase { + if_not_exists, + database, + database_path, + database_alias, + attach_options, + }) + } + + pub fn parse_detach_duckdb_database(&mut self) -> Result { + let database = self.parse_keyword(Keyword::DATABASE); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let database_alias = self.parse_identifier(false)?; + Ok(Statement::DetachDuckDBDatabase { + if_exists, + database, + database_alias, + }) + } + + /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. + pub fn parse_create_secret( + &mut self, + or_replace: bool, + temporary: bool, + persistent: bool, + ) -> Result { + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + + let mut storage_specifier = None; + let mut name = None; + if self.peek_token() != Token::LParen { + if self.parse_keyword(Keyword::IN) { + storage_specifier = self.parse_identifier(false).ok() + } else { + name = self.parse_identifier(false).ok(); + } + + // Storage specifier may follow the name + if storage_specifier.is_none() + && self.peek_token() != Token::LParen + && self.parse_keyword(Keyword::IN) + { + storage_specifier = self.parse_identifier(false).ok(); + } + } + + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::TYPE)?; + let secret_type = self.parse_identifier(false)?; + + let mut options = Vec::new(); + if self.consume_token(&Token::Comma) { + options.append(&mut self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + let value = p.parse_identifier(false)?; + Ok(SecretOption { key, value }) + })?); + } + self.expect_token(&Token::RParen)?; + + let temp = match (temporary, persistent) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + }; + + Ok(Statement::CreateSecret { + or_replace, + temporary: temp, + if_not_exists, + name, + storage_specifier, + secret_type, + options, + }) + } + + /// Parse a field for a duckdb [dictionary] + /// + /// Syntax + /// + /// ```sql + /// 'name': expr + /// ``` + /// + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + pub(crate) fn parse_duckdb_dictionary_field(&mut self) -> Result { + let key = self.parse_identifier(false)?; + + self.expect_token(&Token::Colon)?; + + let expr = self.parse_expr()?; + + Ok(DictionaryField { + key, + value: Box::new(expr), + }) + } + + /// Parse a field for a duckdb [map] + /// + /// Syntax + /// + /// ```sql + /// key: value + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + pub(crate) fn parse_duckdb_map_field(&mut self) -> Result { + let key = self.parse_expr()?; + + self.expect_token(&Token::Colon)?; + + let value = self.parse_expr()?; + + Ok(MapEntry { + key: Box::new(key), + value: Box::new(value), + }) + } + + /// DuckDB specific: Parse a duckdb [map] + /// + /// Syntax: + /// + /// ```sql + /// Map {key1: value1[, ... ]} + /// ``` + /// + /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps + pub(crate) fn parse_duckdb_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + let fields = self.parse_comma_separated0(Self::parse_duckdb_map_field, Token::RBrace)?; + self.expect_token(&Token::RBrace)?; + Ok(Expr::Map(Map { entries: fields })) + } + + /// DuckDB specific: Parse a duckdb [dictionary] + /// + /// Syntax: + /// + /// ```sql + /// {'field_name': expr1[, ... ]} + /// ``` + /// + /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs + pub(crate) fn parse_duckdb_struct_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + + let fields = self.parse_comma_separated(Self::parse_duckdb_dictionary_field)?; + + self.expect_token(&Token::RBrace)?; + + Ok(Expr::Dictionary(fields)) + } + + /// Duckdb Struct Data Type + pub(crate) fn parse_duckdb_struct_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::STRUCT)?; + self.expect_token(&Token::LParen)?; + let struct_body = self.parse_comma_separated(|parser| { + let field_name = parser.parse_identifier(false)?; + let field_type = parser.parse_data_type()?; + + Ok(StructField { + field_name: Some(field_name), + field_type, + }) + }); + self.expect_token(&Token::RParen)?; + struct_body + } +} diff --git a/src/parser/dialects/hive.rs b/src/parser/dialects/hive.rs new file mode 100644 index 000000000..f8e8bc651 --- /dev/null +++ b/src/parser/dialects/hive.rs @@ -0,0 +1,192 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse `CREATE FUNCTION` for [Hive] + /// + /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction + pub fn parse_hive_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::AS)?; + + let as_ = self.parse_create_function_body_string()?; + let using = self.parse_optional_create_function_using()?; + + Ok(Statement::CreateFunction(CreateFunction { + or_replace, + temporary, + name, + function_body: Some(CreateFunctionBody::AsBeforeOptions(as_)), + using, + if_not_exists: false, + args: None, + return_type: None, + behavior: None, + called_on_null: None, + parallel: None, + language: None, + determinism_specifier: None, + options: None, + remote_connection: None, + })) + } + + pub fn parse_hive_formats(&mut self) -> Result { + let mut hive_format = HiveFormat::default(); + loop { + match self.parse_one_of_keywords(&[ + Keyword::ROW, + Keyword::STORED, + Keyword::LOCATION, + Keyword::WITH, + ]) { + Some(Keyword::ROW) => { + hive_format.row_format = Some(self.parse_row_format()?); + } + Some(Keyword::STORED) => { + self.expect_keyword(Keyword::AS)?; + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::OUTPUTFORMAT)?; + let output_format = self.parse_expr()?; + hive_format.storage = Some(HiveIOFormat::IOF { + input_format, + output_format, + }); + } else { + let format = self.parse_file_format()?; + hive_format.storage = Some(HiveIOFormat::FileFormat { format }); + } + } + Some(Keyword::LOCATION) => { + hive_format.location = Some(self.parse_literal_string()?); + } + Some(Keyword::WITH) => { + self.prev_token(); + let properties = self + .parse_options_with_keywords(&[Keyword::WITH, Keyword::SERDEPROPERTIES])?; + if !properties.is_empty() { + hive_format.serde_properties = Some(properties); + } else { + break; + } + } + None => break, + _ => break, + } + } + + Ok(hive_format) + } + + pub fn parse_row_format(&mut self) -> Result { + self.expect_keyword(Keyword::FORMAT)?; + match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { + Some(Keyword::SERDE) => { + let class = self.parse_literal_string()?; + Ok(HiveRowFormat::SERDE { class }) + } + _ => { + let mut row_delimiters = vec![]; + + loop { + match self.parse_one_of_keywords(&[ + Keyword::FIELDS, + Keyword::COLLECTION, + Keyword::MAP, + Keyword::LINES, + Keyword::NULL, + ]) { + Some(Keyword::FIELDS) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsTerminatedBy, + char: self.parse_identifier(false)?, + }); + + if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::FieldsEscapedBy, + char: self.parse_identifier(false)?, + }); + } + } else { + break; + } + } + Some(Keyword::COLLECTION) => { + if self.parse_keywords(&[ + Keyword::ITEMS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::CollectionItemsTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::MAP) => { + if self.parse_keywords(&[ + Keyword::KEYS, + Keyword::TERMINATED, + Keyword::BY, + ]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::MapKeysTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::LINES) => { + if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::LinesTerminatedBy, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + Some(Keyword::NULL) => { + if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { + row_delimiters.push(HiveRowDelimiter { + delimiter: HiveDelimiter::NullDefinedAs, + char: self.parse_identifier(false)?, + }); + } else { + break; + } + } + _ => { + break; + } + } + } + + Ok(HiveRowFormat::DELIMITED { + delimiters: row_delimiters, + }) + } + } + } + + //TODO: Implement parsing for Skewed + pub fn parse_hive_distribution(&mut self) -> Result { + if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(HiveDistributionStyle::PARTITIONED { columns }) + } else { + Ok(HiveDistributionStyle::NONE) + } + } +} diff --git a/src/parser/dialects/mod.rs b/src/parser/dialects/mod.rs new file mode 100644 index 000000000..47d25f66d --- /dev/null +++ b/src/parser/dialects/mod.rs @@ -0,0 +1,9 @@ +mod bigquery; +mod clickhouse; +mod duckdb; +mod hive; +mod mssql; +mod postgresql; +mod snowflake; +mod sqlite; +mod utils; diff --git a/src/parser/dialects/mssql.rs b/src/parser/dialects/mssql.rs new file mode 100644 index 000000000..45a32f236 --- /dev/null +++ b/src/parser/dialects/mssql.rs @@ -0,0 +1,242 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a [MsSql] `DECLARE` statement. + /// + /// Syntax: + /// ```text + /// DECLARE + // { + // { @local_variable [AS] data_type [ = value ] } + // | { @cursor_variable_name CURSOR } + // } [ ,...n ] + /// ``` + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 + pub fn parse_mssql_declare(&mut self) -> Result { + let stmts = self.parse_comma_separated(Parser::parse_mssql_declare_stmt)?; + + Ok(Statement::Declare { stmts }) + } + + /// Parse the body of a [MsSql] `DECLARE`statement. + /// + /// Syntax: + /// ```text + // { + // { @local_variable [AS] data_type [ = value ] } + // | { @cursor_variable_name CURSOR } + // } [ ,...n ] + /// ``` + /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 + pub fn parse_mssql_declare_stmt(&mut self) -> Result { + let name = { + let ident = self.parse_identifier(false)?; + if !ident.value.starts_with('@') { + Err(ParserError::TokenizerError( + "Invalid MsSql variable declaration.".to_string(), + )) + } else { + Ok(ident) + } + }?; + + let (declare_type, data_type) = match self.peek_token().token { + Token::Word(w) => match w.keyword { + Keyword::CURSOR => { + self.next_token(); + (Some(DeclareType::Cursor), None) + } + Keyword::AS => { + self.next_token(); + (None, Some(self.parse_data_type()?)) + } + _ => (None, Some(self.parse_data_type()?)), + }, + _ => (None, Some(self.parse_data_type()?)), + }; + + let assignment = self.parse_mssql_variable_declaration_expression()?; + + Ok(Declare { + names: vec![name], + data_type, + assignment, + declare_type, + binary: None, + sensitive: None, + scroll: None, + hold: None, + for_query: None, + }) + } + + /// Parses the assigned expression in a variable declaration. + /// + /// Syntax: + /// ```text + /// [ = ] + /// ``` + pub fn parse_mssql_variable_declaration_expression( + &mut self, + ) -> Result, ParserError> { + Ok(match self.peek_token().token { + Token::Eq => { + self.next_token(); // Skip `=` + Some(DeclareAssignment::MsSqlAssignment(Box::new( + self.parse_expr()?, + ))) + } + _ => None, + }) + } + + pub(crate) fn parse_mssql_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier(false)?; + + let operation = if self.parse_keywords(&[Keyword::ADD, Keyword::MEMBER]) { + let member_name = self.parse_identifier(false)?; + AlterRoleOperation::AddMember { member_name } + } else if self.parse_keywords(&[Keyword::DROP, Keyword::MEMBER]) { + let member_name = self.parse_identifier(false)?; + AlterRoleOperation::DropMember { member_name } + } else if self.parse_keywords(&[Keyword::WITH, Keyword::NAME]) { + if self.consume_token(&Token::Eq) { + let role_name = self.parse_identifier(false)?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("= after WITH NAME ", self.peek_token()); + } + } else { + return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause + pub fn parse_for_clause(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::XML) { + Ok(Some(self.parse_for_xml()?)) + } else if self.parse_keyword(Keyword::JSON) { + Ok(Some(self.parse_for_json()?)) + } else if self.parse_keyword(Keyword::BROWSE) { + Ok(Some(ForClause::Browse)) + } else { + Ok(None) + } + } + + /// Parse a mssql `FOR XML` clause + pub fn parse_for_xml(&mut self) -> Result { + let for_xml = if self.parse_keyword(Keyword::RAW) { + let mut element_name = None; + if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + element_name = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } + ForXml::Raw(element_name) + } else if self.parse_keyword(Keyword::AUTO) { + ForXml::Auto + } else if self.parse_keyword(Keyword::EXPLICIT) { + ForXml::Explicit + } else if self.parse_keyword(Keyword::PATH) { + let mut element_name = None; + if self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + element_name = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } + ForXml::Path(element_name) + } else { + return Err(ParserError::ParserError( + "Expected FOR XML [RAW | AUTO | EXPLICIT | PATH ]".to_string(), + )); + }; + let mut elements = false; + let mut binary_base64 = false; + let mut root = None; + let mut r#type = false; + while self.peek_token().token == Token::Comma { + self.next_token(); + if self.parse_keyword(Keyword::ELEMENTS) { + elements = true; + } else if self.parse_keyword(Keyword::BINARY) { + self.expect_keyword(Keyword::BASE64)?; + binary_base64 = true; + } else if self.parse_keyword(Keyword::ROOT) { + self.expect_token(&Token::LParen)?; + root = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } else if self.parse_keyword(Keyword::TYPE) { + r#type = true; + } + } + Ok(ForClause::Xml { + for_xml, + elements, + binary_base64, + root, + r#type, + }) + } + + /// Parse a mssql `FOR JSON` clause + pub fn parse_for_json(&mut self) -> Result { + let for_json = if self.parse_keyword(Keyword::AUTO) { + ForJson::Auto + } else if self.parse_keyword(Keyword::PATH) { + ForJson::Path + } else { + return Err(ParserError::ParserError( + "Expected FOR JSON [AUTO | PATH ]".to_string(), + )); + }; + let mut root = None; + let mut include_null_values = false; + let mut without_array_wrapper = false; + while self.peek_token().token == Token::Comma { + self.next_token(); + if self.parse_keyword(Keyword::ROOT) { + self.expect_token(&Token::LParen)?; + root = Some(self.parse_literal_string()?); + self.expect_token(&Token::RParen)?; + } else if self.parse_keyword(Keyword::INCLUDE_NULL_VALUES) { + include_null_values = true; + } else if self.parse_keyword(Keyword::WITHOUT_ARRAY_WRAPPER) { + without_array_wrapper = true; + } + } + Ok(ForClause::Json { + for_json, + root, + include_null_values, + without_array_wrapper, + }) + } + + /// mssql-like convert function + pub(crate) fn parse_mssql_convert(&mut self, is_try: bool) -> Result { + self.expect_token(&Token::LParen)?; + let data_type = self.parse_data_type()?; + self.expect_token(&Token::Comma)?; + let expr = self.parse_expr()?; + let styles = if self.consume_token(&Token::Comma) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + Default::default() + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::Convert { + is_try, + expr: Box::new(expr), + data_type: Some(data_type), + charset: None, + target_before_value: true, + styles, + }) + } +} diff --git a/src/parser/dialects/postgresql.rs b/src/parser/dialects/postgresql.rs new file mode 100644 index 000000000..d4f952a03 --- /dev/null +++ b/src/parser/dialects/postgresql.rs @@ -0,0 +1,280 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse `CREATE FUNCTION` for [Postgres] + /// + /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html + pub fn parse_postgres_create_function( + &mut self, + or_replace: bool, + temporary: bool, + ) -> Result { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = if self.consume_token(&Token::RParen) { + self.prev_token(); + None + } else { + Some(self.parse_comma_separated(Parser::parse_function_arg)?) + }; + + self.expect_token(&Token::RParen)?; + + let return_type = if self.parse_keyword(Keyword::RETURNS) { + Some(self.parse_data_type()?) + } else { + None + }; + + #[derive(Default)] + struct Body { + language: Option, + behavior: Option, + function_body: Option, + called_on_null: Option, + parallel: Option, + } + let mut body = Body::default(); + loop { + fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { + if field.is_some() { + return Err(ParserError::ParserError(format!( + "{name} specified more than once", + ))); + } + Ok(()) + } + if self.parse_keyword(Keyword::AS) { + ensure_not_set(&body.function_body, "AS")?; + body.function_body = Some(CreateFunctionBody::AsBeforeOptions( + self.parse_create_function_body_string()?, + )); + } else if self.parse_keyword(Keyword::LANGUAGE) { + ensure_not_set(&body.language, "LANGUAGE")?; + body.language = Some(self.parse_identifier(false)?); + } else if self.parse_keyword(Keyword::IMMUTABLE) { + ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; + body.behavior = Some(FunctionBehavior::Immutable); + } else if self.parse_keyword(Keyword::STABLE) { + ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; + body.behavior = Some(FunctionBehavior::Stable); + } else if self.parse_keyword(Keyword::VOLATILE) { + ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; + body.behavior = Some(FunctionBehavior::Volatile); + } else if self.parse_keywords(&[ + Keyword::CALLED, + Keyword::ON, + Keyword::NULL, + Keyword::INPUT, + ]) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::CalledOnNullInput); + } else if self.parse_keywords(&[ + Keyword::RETURNS, + Keyword::NULL, + Keyword::ON, + Keyword::NULL, + Keyword::INPUT, + ]) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::ReturnsNullOnNullInput); + } else if self.parse_keyword(Keyword::STRICT) { + ensure_not_set( + &body.called_on_null, + "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", + )?; + body.called_on_null = Some(FunctionCalledOnNull::Strict); + } else if self.parse_keyword(Keyword::PARALLEL) { + ensure_not_set(&body.parallel, "PARALLEL { UNSAFE | RESTRICTED | SAFE }")?; + if self.parse_keyword(Keyword::UNSAFE) { + body.parallel = Some(FunctionParallel::Unsafe); + } else if self.parse_keyword(Keyword::RESTRICTED) { + body.parallel = Some(FunctionParallel::Restricted); + } else if self.parse_keyword(Keyword::SAFE) { + body.parallel = Some(FunctionParallel::Safe); + } else { + return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); + } + } else if self.parse_keyword(Keyword::RETURN) { + ensure_not_set(&body.function_body, "RETURN")?; + body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); + } else { + break; + } + } + + Ok(Statement::CreateFunction(CreateFunction { + or_replace, + temporary, + name, + args, + return_type, + behavior: body.behavior, + called_on_null: body.called_on_null, + parallel: body.parallel, + language: body.language, + function_body: body.function_body, + if_not_exists: false, + using: None, + determinism_specifier: None, + options: None, + remote_connection: None, + })) + } + + /// Parse a postgresql casting style which is in the form of `expr::datatype`. + pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { + Ok(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(expr), + data_type: self.parse_data_type()?, + format: None, + }) + } + + pub(crate) fn parse_pg_alter_role(&mut self) -> Result { + let role_name = self.parse_identifier(false)?; + + // [ IN DATABASE _`database_name`_ ] + let in_database = if self.parse_keywords(&[Keyword::IN, Keyword::DATABASE]) { + self.parse_object_name(false).ok() + } else { + None + }; + + let operation = if self.parse_keyword(Keyword::RENAME) { + if self.parse_keyword(Keyword::TO) { + let role_name = self.parse_identifier(false)?; + AlterRoleOperation::RenameRole { role_name } + } else { + return self.expected("TO after RENAME", self.peek_token()); + } + // SET + } else if self.parse_keyword(Keyword::SET) { + let config_name = self.parse_object_name(false)?; + // FROM CURRENT + if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::FromCurrent, + in_database, + } + // { TO | = } { value | DEFAULT } + } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if self.parse_keyword(Keyword::DEFAULT) { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Default, + in_database, + } + } else if let Ok(expr) = self.parse_expr() { + AlterRoleOperation::Set { + config_name, + config_value: SetConfigValue::Value(expr), + in_database, + } + } else { + self.expected("config value", self.peek_token())? + } + } else { + self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + } + // RESET + } else if self.parse_keyword(Keyword::RESET) { + if self.parse_keyword(Keyword::ALL) { + AlterRoleOperation::Reset { + config_name: ResetConfig::ALL, + in_database, + } + } else { + let config_name = self.parse_object_name(false)?; + AlterRoleOperation::Reset { + config_name: ResetConfig::ConfigName(config_name), + in_database, + } + } + // option + } else { + // [ WITH ] + let _ = self.parse_keyword(Keyword::WITH); + // option + let mut options = vec![]; + while let Some(opt) = self.maybe_parse(|parser| parser.parse_pg_role_option())? { + options.push(opt); + } + // check option + if options.is_empty() { + return self.expected("option", self.peek_token())?; + } + + AlterRoleOperation::WithOptions { options } + }; + + Ok(Statement::AlterRole { + name: role_name, + operation, + }) + } + + pub(crate) fn parse_pg_role_option(&mut self) -> Result { + let option = match self.parse_one_of_keywords(&[ + Keyword::BYPASSRLS, + Keyword::NOBYPASSRLS, + Keyword::CONNECTION, + Keyword::CREATEDB, + Keyword::NOCREATEDB, + Keyword::CREATEROLE, + Keyword::NOCREATEROLE, + Keyword::INHERIT, + Keyword::NOINHERIT, + Keyword::LOGIN, + Keyword::NOLOGIN, + Keyword::PASSWORD, + Keyword::REPLICATION, + Keyword::NOREPLICATION, + Keyword::SUPERUSER, + Keyword::NOSUPERUSER, + Keyword::VALID, + ]) { + Some(Keyword::BYPASSRLS) => RoleOption::BypassRLS(true), + Some(Keyword::NOBYPASSRLS) => RoleOption::BypassRLS(false), + Some(Keyword::CONNECTION) => { + self.expect_keyword(Keyword::LIMIT)?; + RoleOption::ConnectionLimit(Expr::Value(self.parse_number_value()?)) + } + Some(Keyword::CREATEDB) => RoleOption::CreateDB(true), + Some(Keyword::NOCREATEDB) => RoleOption::CreateDB(false), + Some(Keyword::CREATEROLE) => RoleOption::CreateRole(true), + Some(Keyword::NOCREATEROLE) => RoleOption::CreateRole(false), + Some(Keyword::INHERIT) => RoleOption::Inherit(true), + Some(Keyword::NOINHERIT) => RoleOption::Inherit(false), + Some(Keyword::LOGIN) => RoleOption::Login(true), + Some(Keyword::NOLOGIN) => RoleOption::Login(false), + Some(Keyword::PASSWORD) => { + let password = if self.parse_keyword(Keyword::NULL) { + Password::NullPassword + } else { + Password::Password(Expr::Value(self.parse_value()?)) + }; + RoleOption::Password(password) + } + Some(Keyword::REPLICATION) => RoleOption::Replication(true), + Some(Keyword::NOREPLICATION) => RoleOption::Replication(false), + Some(Keyword::SUPERUSER) => RoleOption::SuperUser(true), + Some(Keyword::NOSUPERUSER) => RoleOption::SuperUser(false), + Some(Keyword::VALID) => { + self.expect_keyword(Keyword::UNTIL)?; + RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) + } + _ => self.expected("option", self.peek_token())?, + }; + + Ok(option) + } +} diff --git a/src/parser/dialects/snowflake.rs b/src/parser/dialects/snowflake.rs new file mode 100644 index 000000000..6f7ed5f1a --- /dev/null +++ b/src/parser/dialects/snowflake.rs @@ -0,0 +1,145 @@ +use crate::keywords::ALL_KEYWORDS; +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a [Snowflake] `DECLARE` statement. + /// + /// Syntax: + /// ```text + /// DECLARE + /// [{ + /// | + /// | + /// | }; ... ] + /// + /// + /// [] [ { DEFAULT | := } ] + /// + /// + /// CURSOR FOR + /// + /// + /// RESULTSET [ { DEFAULT | := } ( ) ] ; + /// + /// + /// EXCEPTION [ ( , '' ) ] ; + /// ``` + /// + /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/snowflake-scripting/declare + pub fn parse_snowflake_declare(&mut self) -> Result { + let mut stmts = vec![]; + loop { + let name = self.parse_identifier(false)?; + let (declare_type, for_query, assigned_expr, data_type) = + if self.parse_keyword(Keyword::CURSOR) { + self.expect_keyword(Keyword::FOR)?; + match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::SELECT => ( + Some(DeclareType::Cursor), + Some(self.parse_query()?), + None, + None, + ), + _ => ( + Some(DeclareType::Cursor), + None, + Some(DeclareAssignment::For(Box::new(self.parse_expr()?))), + None, + ), + } + } else if self.parse_keyword(Keyword::RESULTSET) { + let assigned_expr = if self.peek_token().token != Token::SemiColon { + self.parse_snowflake_variable_declaration_expression()? + } else { + // Nothing more to do. The statement has no further parameters. + None + }; + + (Some(DeclareType::ResultSet), None, assigned_expr, None) + } else if self.parse_keyword(Keyword::EXCEPTION) { + let assigned_expr = if self.peek_token().token == Token::LParen { + Some(DeclareAssignment::Expr(Box::new(self.parse_expr()?))) + } else { + // Nothing more to do. The statement has no further parameters. + None + }; + + (Some(DeclareType::Exception), None, assigned_expr, None) + } else { + // Without an explicit keyword, the only valid option is variable declaration. + let (assigned_expr, data_type) = if let Some(assigned_expr) = + self.parse_snowflake_variable_declaration_expression()? + { + (Some(assigned_expr), None) + } else if let Token::Word(_) = self.peek_token().token { + let data_type = self.parse_data_type()?; + ( + self.parse_snowflake_variable_declaration_expression()?, + Some(data_type), + ) + } else { + (None, None) + }; + (None, None, assigned_expr, data_type) + }; + let stmt = Declare { + names: vec![name], + data_type, + assignment: assigned_expr, + declare_type, + binary: None, + sensitive: None, + scroll: None, + hold: None, + for_query, + }; + + stmts.push(stmt); + if self.consume_token(&Token::SemiColon) { + match self.peek_token().token { + Token::Word(w) + if ALL_KEYWORDS + .binary_search(&w.value.to_uppercase().as_str()) + .is_err() => + { + // Not a keyword - start of a new declaration. + continue; + } + _ => { + // Put back the semicolon, this is the end of the DECLARE statement. + self.prev_token(); + } + } + } + + break; + } + + Ok(Statement::Declare { stmts }) + } + + /// Parses the assigned expression in a variable declaration. + /// + /// Syntax: + /// ```text + /// [ { DEFAULT | := } ] + /// ``` + /// + pub fn parse_snowflake_variable_declaration_expression( + &mut self, + ) -> Result, ParserError> { + Ok(match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::DEFAULT => { + self.next_token(); // Skip `DEFAULT` + Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) + } + Token::Assignment => { + self.next_token(); // Skip `:=` + Some(DeclareAssignment::DuckAssignment(Box::new( + self.parse_expr()?, + ))) + } + _ => None, + }) + } +} diff --git a/src/parser/dialects/sqlite.rs b/src/parser/dialects/sqlite.rs new file mode 100644 index 000000000..c84254973 --- /dev/null +++ b/src/parser/dialects/sqlite.rs @@ -0,0 +1,23 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// SQLite-specific `CREATE VIRTUAL TABLE` + pub fn parse_create_virtual_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let module_name = self.parse_identifier(false)?; + // SQLite docs note that module "arguments syntax is sufficiently + // general that the arguments can be made to appear as column + // definitions in a traditional CREATE TABLE statement", but + // we don't implement that. + let module_args = self.parse_parenthesized_column_list(Optional, false)?; + Ok(Statement::CreateVirtualTable { + name: table_name, + if_not_exists, + module_name, + module_args, + }) + } +} diff --git a/src/parser/dialects/utils.rs b/src/parser/dialects/utils.rs new file mode 100644 index 000000000..ec656a164 --- /dev/null +++ b/src/parser/dialects/utils.rs @@ -0,0 +1,19 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse the body of a `CREATE FUNCTION` specified as a string. + /// e.g. `CREATE FUNCTION ... AS $$ body $$`. + pub(crate) fn parse_create_function_body_string(&mut self) -> Result { + let peek_token = self.peek_token(); + match peek_token.token { + Token::DollarQuotedString(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + self.next_token(); + Ok(Expr::Value(Value::DollarQuotedString(s))) + } + _ => Ok(Expr::Value(Value::SingleQuotedString( + self.parse_literal_string()?, + ))), + } + } +} diff --git a/src/parser/discard.rs b/src/parser/discard.rs new file mode 100644 index 000000000..5e81ac8e3 --- /dev/null +++ b/src/parser/discard.rs @@ -0,0 +1,21 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_discard(&mut self) -> Result { + let object_type = if self.parse_keyword(Keyword::ALL) { + DiscardObject::ALL + } else if self.parse_keyword(Keyword::PLANS) { + DiscardObject::PLANS + } else if self.parse_keyword(Keyword::SEQUENCES) { + DiscardObject::SEQUENCES + } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { + DiscardObject::TEMP + } else { + return self.expected( + "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", + self.peek_token(), + ); + }; + Ok(Statement::Discard { object_type }) + } +} diff --git a/src/parser/drop.rs b/src/parser/drop.rs new file mode 100644 index 000000000..6f515af7e --- /dev/null +++ b/src/parser/drop.rs @@ -0,0 +1,189 @@ +use super::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse statements of the DropTrigger type such as: + /// + /// ```sql + /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + pub fn parse_drop_trigger(&mut self) -> Result { + if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { + self.prev_token(); + return self.expected("an object type after DROP", self.peek_token()); + } + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let trigger_name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = self + .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) + .map(|keyword| match keyword { + Keyword::CASCADE => ReferentialAction::Cascade, + Keyword::RESTRICT => ReferentialAction::Restrict, + _ => unreachable!(), + }); + Ok(Statement::DropTrigger { + if_exists, + trigger_name, + table_name, + option, + }) + } + + pub fn parse_drop(&mut self) -> Result { + // MySQL dialect supports `TEMPORARY` + let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) + && self.parse_keyword(Keyword::TEMPORARY); + let persistent = dialect_of!(self is DuckDbDialect) + && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); + + let object_type = if self.parse_keyword(Keyword::TABLE) { + ObjectType::Table + } else if self.parse_keyword(Keyword::VIEW) { + ObjectType::View + } else if self.parse_keyword(Keyword::INDEX) { + ObjectType::Index + } else if self.parse_keyword(Keyword::ROLE) { + ObjectType::Role + } else if self.parse_keyword(Keyword::SCHEMA) { + ObjectType::Schema + } else if self.parse_keyword(Keyword::DATABASE) { + ObjectType::Database + } else if self.parse_keyword(Keyword::SEQUENCE) { + ObjectType::Sequence + } else if self.parse_keyword(Keyword::STAGE) { + ObjectType::Stage + } else if self.parse_keyword(Keyword::TYPE) { + ObjectType::Type + } else if self.parse_keyword(Keyword::FUNCTION) { + return self.parse_drop_function(); + } else if self.parse_keyword(Keyword::POLICY) { + return self.parse_drop_policy(); + } else if self.parse_keyword(Keyword::PROCEDURE) { + return self.parse_drop_procedure(); + } else if self.parse_keyword(Keyword::SECRET) { + return self.parse_drop_secret(temporary, persistent); + } else if self.parse_keyword(Keyword::TRIGGER) { + return self.parse_drop_trigger(); + } else { + return self.expected( + "TABLE, VIEW, INDEX, ROLE, SCHEMA, DATABASE, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET, SEQUENCE, or TYPE after DROP", + self.peek_token(), + ); + }; + // Many dialects support the non-standard `IF EXISTS` clause and allow + // specifying multiple objects to delete in a single statement + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; + + let loc = self.peek_token().span.start; + let cascade = self.parse_keyword(Keyword::CASCADE); + let restrict = self.parse_keyword(Keyword::RESTRICT); + let purge = self.parse_keyword(Keyword::PURGE); + if cascade && restrict { + return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); + } + if object_type == ObjectType::Role && (cascade || restrict || purge) { + return parser_err!( + "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", + loc + ); + } + Ok(Statement::Drop { + object_type, + if_exists, + names, + cascade, + restrict, + purge, + temporary, + }) + } + + fn parse_optional_referential_action(&mut self) -> Option { + match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + _ => None, + } + } + + /// ```sql + /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_function(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; + let option = self.parse_optional_referential_action(); + Ok(Statement::DropFunction { + if_exists, + func_desc, + option, + }) + } + + /// ```sql + /// DROP POLICY [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] + /// ``` + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) + fn parse_drop_policy(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::ON)?; + let table_name = self.parse_object_name(false)?; + let option = self.parse_optional_referential_action(); + Ok(Statement::DropPolicy { + if_exists, + name, + table_name, + option, + }) + } + + /// ```sql + /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] + /// [ CASCADE | RESTRICT ] + /// ``` + fn parse_drop_procedure(&mut self) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let proc_desc = self.parse_comma_separated(Parser::parse_function_desc)?; + let option = self.parse_optional_referential_action(); + Ok(Statement::DropProcedure { + if_exists, + proc_desc, + option, + }) + } + + /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. + fn parse_drop_secret( + &mut self, + temporary: bool, + persistent: bool, + ) -> Result { + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let name = self.parse_identifier(false)?; + let storage_specifier = if self.parse_keyword(Keyword::FROM) { + self.parse_identifier(false).ok() + } else { + None + }; + let temp = match (temporary, persistent) { + (true, false) => Some(true), + (false, true) => Some(false), + (false, false) => None, + _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + }; + + Ok(Statement::DropSecret { + if_exists, + temporary: temp, + name, + storage_specifier, + }) + } +} diff --git a/src/parser/end.rs b/src/parser/end.rs new file mode 100644 index 000000000..fd74eecf4 --- /dev/null +++ b/src/parser/end.rs @@ -0,0 +1,9 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_end(&mut self) -> Result { + Ok(Statement::Commit { + chain: self.parse_commit_rollback_chain()?, + }) + } +} diff --git a/src/parser/execute.rs b/src/parser/execute.rs new file mode 100644 index 000000000..b4a0d2c8c --- /dev/null +++ b/src/parser/execute.rs @@ -0,0 +1,38 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_execute(&mut self) -> Result { + let name = self.parse_object_name(false)?; + + let has_parentheses = self.consume_token(&Token::LParen); + + let end_token = match (has_parentheses, self.peek_token().token) { + (true, _) => Token::RParen, + (false, Token::EOF) => Token::EOF, + (false, Token::Word(w)) if w.keyword == Keyword::USING => Token::Word(w), + (false, _) => Token::SemiColon, + }; + + let parameters = self.parse_comma_separated0(Parser::parse_expr, end_token)?; + + if has_parentheses { + self.expect_token(&Token::RParen)?; + } + + let mut using = vec![]; + if self.parse_keyword(Keyword::USING) { + using.push(self.parse_expr()?); + + while self.consume_token(&Token::Comma) { + using.push(self.parse_expr()?); + } + }; + + Ok(Statement::Execute { + name, + parameters, + has_parentheses, + using, + }) + } +} diff --git a/src/parser/explain.rs b/src/parser/explain.rs new file mode 100644 index 000000000..9e24de0e0 --- /dev/null +++ b/src/parser/explain.rs @@ -0,0 +1,92 @@ +use super::*; + +impl<'a> Parser<'a> { + pub fn parse_explain( + &mut self, + describe_alias: DescribeAlias, + ) -> Result { + let mut analyze = false; + let mut verbose = false; + let mut query_plan = false; + let mut format = None; + let mut options = None; + + // Note: DuckDB is compatible with PostgreSQL syntax for this statement, + // although not all features may be implemented. + if describe_alias == DescribeAlias::Explain + && self.dialect.supports_explain_with_utility_options() + && self.peek_token().token == Token::LParen + { + options = Some(self.parse_utility_options()?) + } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { + query_plan = true; + } else { + analyze = self.parse_keyword(Keyword::ANALYZE); + verbose = self.parse_keyword(Keyword::VERBOSE); + if self.parse_keyword(Keyword::FORMAT) { + format = Some(self.parse_analyze_format()?); + } + } + + match self.maybe_parse(|parser| parser.parse_statement())? { + Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err( + ParserError::ParserError("Explain must be root of the plan".to_string()), + ), + Some(statement) => Ok(Statement::Explain { + describe_alias, + analyze, + verbose, + query_plan, + statement: Box::new(statement), + format, + options, + }), + _ => { + let hive_format = + match self.parse_one_of_keywords(&[Keyword::EXTENDED, Keyword::FORMATTED]) { + Some(Keyword::EXTENDED) => Some(HiveDescribeFormat::Extended), + Some(Keyword::FORMATTED) => Some(HiveDescribeFormat::Formatted), + _ => None, + }; + + let has_table_keyword = if self.dialect.describe_requires_table_keyword() { + // only allow to use TABLE keyword for DESC|DESCRIBE statement + self.parse_keyword(Keyword::TABLE) + } else { + false + }; + + let table_name = self.parse_object_name(false)?; + Ok(Statement::ExplainTable { + describe_alias, + hive_format, + has_table_keyword, + table_name, + }) + } + } + } + + pub fn parse_utility_options(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Self::parse_utility_option)?; + self.expect_token(&Token::RParen)?; + + Ok(options) + } + + fn parse_utility_option(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let next_token = self.peek_token(); + if next_token == Token::Comma || next_token == Token::RParen { + return Ok(UtilityOption { name, arg: None }); + } + let arg = self.parse_expr()?; + + Ok(UtilityOption { + name, + arg: Some(arg), + }) + } +} diff --git a/src/parser/expr.rs b/src/parser/expr.rs new file mode 100644 index 000000000..b2b2e9260 --- /dev/null +++ b/src/parser/expr.rs @@ -0,0 +1,2115 @@ +use super::*; + +use crate::parser_err; + +// Every Parser method with "parse" and "expr" in the name. + +impl<'a> Parser<'a> { + /// Parse a new expression. + pub fn parse_expr(&mut self) -> Result { + self.parse_subexpr(self.dialect.prec_unknown()) + } + + /// Parse tokens until the precedence changes. + pub fn parse_subexpr(&mut self, precedence: u8) -> Result { + let _guard = self.recursion_counter.try_decrease()?; + debug!("parsing expr"); + let mut expr = self.parse_prefix()?; + debug!("prefix: {:?}", expr); + loop { + let next_precedence = self.get_next_precedence()?; + debug!("next precedence: {:?}", next_precedence); + + if precedence >= next_precedence { + break; + } + + expr = self.parse_infix(expr, next_precedence)?; + } + Ok(expr) + } + + /// Parses an array expression `[ex1, ex2, ..]` + /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` + pub fn parse_array_expr(&mut self, named: bool) -> Result { + let exprs = self.parse_comma_separated0(Parser::parse_expr, Token::RBracket)?; + self.expect_token(&Token::RBracket)?; + Ok(Expr::Array(Array { elem: exprs, named })) + } + + /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. + pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { + // Stop parsing subexpressions for and on tokens with + // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. + let low = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; + self.expect_keyword(Keyword::AND)?; + let high = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; + Ok(Expr::Between { + expr: Box::new(expr), + negated, + low: Box::new(low), + high: Box::new(high), + }) + } + + pub fn parse_case_expr(&mut self) -> Result { + let mut operand = None; + if !self.parse_keyword(Keyword::WHEN) { + operand = Some(Box::new(self.parse_expr()?)); + self.expect_keyword(Keyword::WHEN)?; + } + let mut conditions = vec![]; + let mut results = vec![]; + loop { + conditions.push(self.parse_expr()?); + self.expect_keyword(Keyword::THEN)?; + results.push(self.parse_expr()?); + if !self.parse_keyword(Keyword::WHEN) { + break; + } + } + let else_result = if self.parse_keyword(Keyword::ELSE) { + Some(Box::new(self.parse_expr()?)) + } else { + None + }; + self.expect_keyword(Keyword::END)?; + Ok(Expr::Case { + operand, + conditions, + results, + else_result, + }) + } + + /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` + pub fn parse_cast_expr(&mut self, kind: CastKind) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::AS)?; + let data_type = self.parse_data_type()?; + let format = self.parse_optional_cast_format()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Cast { + kind, + expr: Box::new(expr), + data_type, + format, + }) + } + + pub fn parse_ceil_floor_expr(&mut self, is_ceil: bool) -> Result { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + // Parse `CEIL/FLOOR(expr)` + let field = if self.parse_keyword(Keyword::TO) { + // Parse `CEIL/FLOOR(expr TO DateTimeField)` + CeilFloorKind::DateTimeField(self.parse_date_time_field()?) + } else if self.consume_token(&Token::Comma) { + // Parse `CEIL/FLOOR(expr, scale)` + match self.parse_value()? { + Value::Number(n, s) => CeilFloorKind::Scale(Value::Number(n, s)), + _ => { + return Err(ParserError::ParserError( + "Scale field can only be of number type".to_string(), + )) + } + } + } else { + CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) + }; + self.expect_token(&Token::RParen)?; + if is_ceil { + Ok(Expr::Ceil { + expr: Box::new(expr), + field, + }) + } else { + Ok(Expr::Floor { + expr: Box::new(expr), + field, + }) + } + } + + pub fn parse_character_length(&mut self) -> Result { + if self.parse_keyword(Keyword::MAX) { + return Ok(CharacterLength::Max); + } + let length = self.parse_literal_uint()?; + let unit = if self.parse_keyword(Keyword::CHARACTERS) { + Some(CharLengthUnits::Characters) + } else if self.parse_keyword(Keyword::OCTETS) { + Some(CharLengthUnits::Octets) + } else { + None + }; + Ok(CharacterLength::IntegerLength { length, unit }) + } + + /// Parse a SQL CONVERT function: + /// - `CONVERT('héhé' USING utf8mb4)` (MySQL) + /// - `CONVERT('héhé', CHAR CHARACTER SET utf8mb4)` (MySQL) + /// - `CONVERT(DECIMAL(10, 5), 42)` (MSSQL) - the type comes first + pub fn parse_convert_expr(&mut self, is_try: bool) -> Result { + if self.dialect.convert_type_before_value() { + return self.parse_mssql_convert(is_try); + } + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::USING) { + let charset = self.parse_object_name(false)?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::Convert { + is_try, + expr: Box::new(expr), + data_type: None, + charset: Some(charset), + target_before_value: false, + styles: vec![], + }); + } + self.expect_token(&Token::Comma)?; + let data_type = self.parse_data_type()?; + let charset = if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { + Some(self.parse_object_name(false)?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok(Expr::Convert { + is_try, + expr: Box::new(expr), + data_type: Some(data_type), + charset, + target_before_value: false, + styles: vec![], + }) + } + + // This function parses date/time fields for the EXTRACT function-like + // operator, interval qualifiers, and the ceil/floor operations. + // EXTRACT supports a wider set of date/time fields than interval qualifiers, + // so this function may need to be split in two. + pub fn parse_date_time_field(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::YEAR => Ok(DateTimeField::Year), + Keyword::MONTH => Ok(DateTimeField::Month), + Keyword::WEEK => { + let week_day = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + let week_day = self.parse_identifier(false)?; + self.expect_token(&Token::RParen)?; + Some(week_day) + } else { + None + }; + Ok(DateTimeField::Week(week_day)) + } + Keyword::DAY => Ok(DateTimeField::Day), + Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), + Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), + Keyword::DATE => Ok(DateTimeField::Date), + Keyword::DATETIME => Ok(DateTimeField::Datetime), + Keyword::HOUR => Ok(DateTimeField::Hour), + Keyword::MINUTE => Ok(DateTimeField::Minute), + Keyword::SECOND => Ok(DateTimeField::Second), + Keyword::CENTURY => Ok(DateTimeField::Century), + Keyword::DECADE => Ok(DateTimeField::Decade), + Keyword::DOY => Ok(DateTimeField::Doy), + Keyword::DOW => Ok(DateTimeField::Dow), + Keyword::EPOCH => Ok(DateTimeField::Epoch), + Keyword::ISODOW => Ok(DateTimeField::Isodow), + Keyword::ISOYEAR => Ok(DateTimeField::Isoyear), + Keyword::ISOWEEK => Ok(DateTimeField::IsoWeek), + Keyword::JULIAN => Ok(DateTimeField::Julian), + Keyword::MICROSECOND => Ok(DateTimeField::Microsecond), + Keyword::MICROSECONDS => Ok(DateTimeField::Microseconds), + Keyword::MILLENIUM => Ok(DateTimeField::Millenium), + Keyword::MILLENNIUM => Ok(DateTimeField::Millennium), + Keyword::MILLISECOND => Ok(DateTimeField::Millisecond), + Keyword::MILLISECONDS => Ok(DateTimeField::Milliseconds), + Keyword::NANOSECOND => Ok(DateTimeField::Nanosecond), + Keyword::NANOSECONDS => Ok(DateTimeField::Nanoseconds), + Keyword::QUARTER => Ok(DateTimeField::Quarter), + Keyword::TIME => Ok(DateTimeField::Time), + Keyword::TIMEZONE => Ok(DateTimeField::Timezone), + Keyword::TIMEZONE_ABBR => Ok(DateTimeField::TimezoneAbbr), + Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), + Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), + Keyword::TIMEZONE_REGION => Ok(DateTimeField::TimezoneRegion), + _ if self.dialect.allow_extract_custom() => { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } + _ => self.expected("date/time field", next_token), + }, + Token::SingleQuotedString(_) if self.dialect.allow_extract_single_quotes() => { + self.prev_token(); + let custom = self.parse_identifier(false)?; + Ok(DateTimeField::Custom(custom)) + } + _ => self.expected("date/time field", next_token), + } + } + + /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` + pub fn parse_escape_char(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::ESCAPE) { + Ok(Some(self.parse_literal_string()?)) + } else { + Ok(None) + } + } + + /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. + pub fn parse_exists_expr(&mut self, negated: bool) -> Result { + self.expect_token(&Token::LParen)?; + let exists_node = Expr::Exists { + negated, + subquery: self.parse_query()?, + }; + self.expect_token(&Token::RParen)?; + Ok(exists_node) + } + + pub fn parse_extract_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let field = self.parse_date_time_field()?; + + let syntax = if self.parse_keyword(Keyword::FROM) { + ExtractSyntax::From + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | GenericDialect) + { + ExtractSyntax::Comma + } else { + return Err(ParserError::ParserError( + "Expected 'FROM' or ','".to_string(), + )); + }; + + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Extract { + field, + expr: Box::new(expr), + syntax, + }) + } + + pub fn parse_function(&mut self, name: ObjectName) -> Result { + self.expect_token(&Token::LParen)?; + + // Snowflake permits a subquery to be passed as an argument without + // an enclosing set of parens if it's the only argument. + if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::Function(Function { + name, + parameters: FunctionArguments::None, + args: FunctionArguments::Subquery(subquery), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + })); + } + + let mut args = self.parse_function_argument_list()?; + let mut parameters = FunctionArguments::None; + // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` + // which (0.5, 0.6) is a parameter to the function. + if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.consume_token(&Token::LParen) + { + parameters = FunctionArguments::List(args); + args = self.parse_function_argument_list()?; + } + + let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { + self.expect_token(&Token::LParen)?; + self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; + let order_by = self.parse_comma_separated(Parser::parse_order_by_expr)?; + self.expect_token(&Token::RParen)?; + order_by + } else { + vec![] + }; + + let filter = if self.dialect.supports_filter_during_aggregation() + && self.parse_keyword(Keyword::FILTER) + && self.consume_token(&Token::LParen) + && self.parse_keyword(Keyword::WHERE) + { + let filter = Some(Box::new(self.parse_expr()?)); + self.expect_token(&Token::RParen)?; + filter + } else { + None + }; + + // Syntax for null treatment shows up either in the args list + // or after the function call, but not both. + let null_treatment = if args + .clauses + .iter() + .all(|clause| !matches!(clause, FunctionArgumentClause::IgnoreOrRespectNulls(_))) + { + self.parse_null_treatment()? + } else { + None + }; + + let over = if self.parse_keyword(Keyword::OVER) { + if self.consume_token(&Token::LParen) { + let window_spec = self.parse_window_spec()?; + Some(WindowType::WindowSpec(window_spec)) + } else { + Some(WindowType::NamedWindow(self.parse_identifier(false)?)) + } + } else { + None + }; + + Ok(Expr::Function(Function { + name, + parameters, + args: FunctionArguments::List(args), + null_treatment, + filter, + over, + within_group, + })) + } + + /// Parses the parens following the `[ NOT ] IN` operator. + pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { + // BigQuery allows `IN UNNEST(array_expression)` + // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators + if self.parse_keyword(Keyword::UNNEST) { + self.expect_token(&Token::LParen)?; + let array_expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + return Ok(Expr::InUnnest { + expr: Box::new(expr), + array_expr: Box::new(array_expr), + negated, + }); + } + self.expect_token(&Token::LParen)?; + let in_op = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { + self.prev_token(); + Expr::InSubquery { + expr: Box::new(expr), + subquery: self.parse_query()?, + negated, + } + } else { + Expr::InList { + expr: Box::new(expr), + list: if self.dialect.supports_in_empty_list() { + self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? + } else { + self.parse_comma_separated(Parser::parse_expr)? + }, + negated, + } + }; + self.expect_token(&Token::RParen)?; + Ok(in_op) + } + + /// Parse an operator following an expression + pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { + // allow the dialect to override infix parsing + if let Some(infix) = self.dialect.parse_infix(self, &expr, precedence) { + return infix; + } + + let mut tok = self.next_token(); + let regular_binary_operator = match &mut tok.token { + Token::Spaceship => Some(BinaryOperator::Spaceship), + Token::DoubleEq => Some(BinaryOperator::Eq), + Token::Eq => Some(BinaryOperator::Eq), + Token::Neq => Some(BinaryOperator::NotEq), + Token::Gt => Some(BinaryOperator::Gt), + Token::GtEq => Some(BinaryOperator::GtEq), + Token::Lt => Some(BinaryOperator::Lt), + Token::LtEq => Some(BinaryOperator::LtEq), + Token::Plus => Some(BinaryOperator::Plus), + Token::Minus => Some(BinaryOperator::Minus), + Token::Mul => Some(BinaryOperator::Multiply), + Token::Mod => Some(BinaryOperator::Modulo), + Token::StringConcat => Some(BinaryOperator::StringConcat), + Token::Pipe => Some(BinaryOperator::BitwiseOr), + Token::Caret => { + // In PostgreSQL, ^ stands for the exponentiation operation, + // and # stands for XOR. See https://www.postgresql.org/docs/current/functions-math.html + if dialect_of!(self is PostgreSqlDialect) { + Some(BinaryOperator::PGExp) + } else { + Some(BinaryOperator::BitwiseXor) + } + } + Token::Ampersand => Some(BinaryOperator::BitwiseAnd), + Token::Div => Some(BinaryOperator::Divide), + Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::DuckIntegerDivide) + } + Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::PGBitwiseShiftLeft) + } + Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Some(BinaryOperator::PGBitwiseShiftRight) + } + Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { + Some(BinaryOperator::PGBitwiseXor) + } + Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Some(BinaryOperator::PGOverlap) + } + Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Some(BinaryOperator::PGStartsWith) + } + Token::Tilde => Some(BinaryOperator::PGRegexMatch), + Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), + Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), + Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch), + Token::DoubleTilde => Some(BinaryOperator::PGLikeMatch), + Token::DoubleTildeAsterisk => Some(BinaryOperator::PGILikeMatch), + Token::ExclamationMarkDoubleTilde => Some(BinaryOperator::PGNotLikeMatch), + Token::ExclamationMarkDoubleTildeAsterisk => Some(BinaryOperator::PGNotILikeMatch), + Token::Arrow => Some(BinaryOperator::Arrow), + Token::LongArrow => Some(BinaryOperator::LongArrow), + Token::HashArrow => Some(BinaryOperator::HashArrow), + Token::HashLongArrow => Some(BinaryOperator::HashLongArrow), + Token::AtArrow => Some(BinaryOperator::AtArrow), + Token::ArrowAt => Some(BinaryOperator::ArrowAt), + Token::HashMinus => Some(BinaryOperator::HashMinus), + Token::AtQuestion => Some(BinaryOperator::AtQuestion), + Token::AtAt => Some(BinaryOperator::AtAt), + Token::Question => Some(BinaryOperator::Question), + Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), + Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), + Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), + + Token::Word(w) => match w.keyword { + Keyword::AND => Some(BinaryOperator::And), + Keyword::OR => Some(BinaryOperator::Or), + Keyword::XOR => Some(BinaryOperator::Xor), + Keyword::OPERATOR if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + // there are special rules for operator names in + // postgres so we can not use 'parse_object' + // or similar. + // See https://www.postgresql.org/docs/current/sql-createoperator.html + let mut idents = vec![]; + loop { + idents.push(self.next_token().to_string()); + if !self.consume_token(&Token::Period) { + break; + } + } + self.expect_token(&Token::RParen)?; + Some(BinaryOperator::PGCustomBinaryOperator(idents)) + } + _ => None, + }, + _ => None, + }; + + if let Some(op) = regular_binary_operator { + if let Some(keyword) = + self.parse_one_of_keywords(&[Keyword::ANY, Keyword::ALL, Keyword::SOME]) + { + self.expect_token(&Token::LParen)?; + let right = if self.peek_sub_query() { + // We have a subquery ahead (SELECT\WITH ...) need to rewind and + // use the parenthesis for parsing the subquery as an expression. + self.prev_token(); // LParen + self.parse_subexpr(precedence)? + } else { + // Non-subquery expression + let right = self.parse_subexpr(precedence)?; + self.expect_token(&Token::RParen)?; + right + }; + + if !matches!( + op, + BinaryOperator::Gt + | BinaryOperator::Lt + | BinaryOperator::GtEq + | BinaryOperator::LtEq + | BinaryOperator::Eq + | BinaryOperator::NotEq + ) { + return parser_err!( + format!( + "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" + ), + tok.span.start + ); + }; + + Ok(match keyword { + Keyword::ALL => Expr::AllOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + }, + Keyword::ANY | Keyword::SOME => Expr::AnyOp { + left: Box::new(expr), + compare_op: op, + right: Box::new(right), + is_some: keyword == Keyword::SOME, + }, + _ => unreachable!(), + }) + } else { + Ok(Expr::BinaryOp { + left: Box::new(expr), + op, + right: Box::new(self.parse_subexpr(precedence)?), + }) + } + } else if let Token::Word(w) = &tok.token { + match w.keyword { + Keyword::IS => { + if self.parse_keyword(Keyword::NULL) { + Ok(Expr::IsNull(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { + Ok(Expr::IsNotNull(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::TRUE]) { + Ok(Expr::IsTrue(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::TRUE]) { + Ok(Expr::IsNotTrue(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::FALSE]) { + Ok(Expr::IsFalse(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::FALSE]) { + Ok(Expr::IsNotFalse(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::UNKNOWN]) { + Ok(Expr::IsUnknown(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::UNKNOWN]) { + Ok(Expr::IsNotUnknown(Box::new(expr))) + } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::FROM]) { + let expr2 = self.parse_expr()?; + Ok(Expr::IsDistinctFrom(Box::new(expr), Box::new(expr2))) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::DISTINCT, Keyword::FROM]) + { + let expr2 = self.parse_expr()?; + Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) + } else { + self.expected( + "[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS", + self.peek_token(), + ) + } + } + Keyword::AT => { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + Ok(Expr::AtTimeZone { + timestamp: Box::new(expr), + time_zone: Box::new(self.parse_subexpr(precedence)?), + }) + } + Keyword::NOT + | Keyword::IN + | Keyword::BETWEEN + | Keyword::LIKE + | Keyword::ILIKE + | Keyword::SIMILAR + | Keyword::REGEXP + | Keyword::RLIKE => { + self.prev_token(); + let negated = self.parse_keyword(Keyword::NOT); + let regexp = self.parse_keyword(Keyword::REGEXP); + let rlike = self.parse_keyword(Keyword::RLIKE); + if regexp || rlike { + Ok(Expr::RLike { + negated, + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + regexp, + }) + } else if self.parse_keyword(Keyword::IN) { + self.parse_in(expr, negated) + } else if self.parse_keyword(Keyword::BETWEEN) { + self.parse_between(expr, negated) + } else if self.parse_keyword(Keyword::LIKE) { + Ok(Expr::Like { + negated, + any: self.parse_keyword(Keyword::ANY), + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + escape_char: self.parse_escape_char()?, + }) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Expr::ILike { + negated, + any: self.parse_keyword(Keyword::ANY), + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + escape_char: self.parse_escape_char()?, + }) + } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { + Ok(Expr::SimilarTo { + negated, + expr: Box::new(expr), + pattern: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, + ), + escape_char: self.parse_escape_char()?, + }) + } else { + self.expected("IN or BETWEEN after NOT", self.peek_token()) + } + } + // Can only happen if `get_next_precedence` got out of sync with this function + _ => parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.span.start + ), + } + } else if Token::DoubleColon == tok { + Ok(Expr::Cast { + kind: CastKind::DoubleColon, + expr: Box::new(expr), + data_type: self.parse_data_type()?, + format: None, + }) + } else if Token::ExclamationMark == tok && self.dialect.supports_factorial_operator() { + Ok(Expr::UnaryOp { + op: UnaryOperator::PGPostfixFactorial, + expr: Box::new(expr), + }) + } else if Token::LBracket == tok { + if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { + self.parse_subscript(expr) + } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { + self.prev_token(); + self.parse_json_access(expr) + } else { + self.parse_map_access(expr) + } + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { + self.prev_token(); + self.parse_json_access(expr) + } else { + // Can only happen if `get_next_precedence` got out of sync with this function + parser_err!( + format!("No infix parser for token {:?}", tok.token), + tok.span.start + ) + } + } + + /// Parse an `INTERVAL` expression. + /// + /// Some syntactically valid intervals: + /// + /// ```sql + /// 1. INTERVAL '1' DAY + /// 2. INTERVAL '1-1' YEAR TO MONTH + /// 3. INTERVAL '1' SECOND + /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) + /// 5. INTERVAL '1.1' SECOND (2, 2) + /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) + /// 7. (MySql & BigQuery only): INTERVAL 1 DAY + /// ``` + /// + /// Note that we do not currently attempt to parse the quoted value. + pub fn parse_interval(&mut self) -> Result { + // The SQL standard allows an optional sign before the value string, but + // it is not clear if any implementations support that syntax, so we + // don't currently try to parse it. (The sign can instead be included + // inside the value string.) + + // to match the different flavours of INTERVAL syntax, we only allow expressions + // if the dialect requires an interval qualifier, + // see https://github.com/sqlparser-rs/sqlparser-rs/pull/1398 for more details + let value = if self.dialect.require_interval_qualifier() { + // parse a whole expression so `INTERVAL 1 + 1 DAY` is valid + self.parse_expr()? + } else { + // parse a prefix expression so `INTERVAL 1 DAY` is valid, but `INTERVAL 1 + 1 DAY` is not + // this also means that `INTERVAL '5 days' > INTERVAL '1 day'` treated properly + self.parse_prefix()? + }; + + // Following the string literal is a qualifier which indicates the units + // of the duration specified in the string literal. + // + // Note that PostgreSQL allows omitting the qualifier, so we provide + // this more general implementation. + let leading_field = if self.next_token_is_temporal_unit() { + Some(self.parse_date_time_field()?) + } else if self.dialect.require_interval_qualifier() { + return parser_err!( + "INTERVAL requires a unit after the literal value", + self.peek_token().span.start + ); + } else { + None + }; + + let (leading_precision, last_field, fsec_precision) = + if leading_field == Some(DateTimeField::Second) { + // SQL mandates special syntax for `SECOND TO SECOND` literals. + // Instead of + // `SECOND [()] TO SECOND[()]` + // one must use the special format: + // `SECOND [( [ , ] )]` + let last_field = None; + let (leading_precision, fsec_precision) = self.parse_optional_precision_scale()?; + (leading_precision, last_field, fsec_precision) + } else { + let leading_precision = self.parse_optional_precision()?; + if self.parse_keyword(Keyword::TO) { + let last_field = Some(self.parse_date_time_field()?); + let fsec_precision = if last_field == Some(DateTimeField::Second) { + self.parse_optional_precision()? + } else { + None + }; + (leading_precision, last_field, fsec_precision) + } else { + (leading_precision, None, None) + } + }; + + Ok(Expr::Interval(Interval { + value: Box::new(value), + leading_field, + leading_precision, + last_field, + fractional_seconds_precision: fsec_precision, + })) + } + + pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { + if self.parse_keyword(Keyword::ERROR) { + Ok(Some(ListAggOnOverflow::Error)) + } else { + self.expect_keyword(Keyword::TRUNCATE)?; + let filler = match self.peek_token().token { + Token::Word(w) + if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => + { + None + } + Token::SingleQuotedString(_) + | Token::EscapedStringLiteral(_) + | Token::UnicodeStringLiteral(_) + | Token::NationalStringLiteral(_) + | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), + _ => self.expected( + "either filler, WITH, or WITHOUT in LISTAGG", + self.peek_token(), + )?, + }; + let with_count = self.parse_keyword(Keyword::WITH); + if !with_count && !self.parse_keyword(Keyword::WITHOUT) { + self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + } + self.expect_keyword(Keyword::COUNT)?; + Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) + } + } else { + Ok(None) + } + } + + pub fn parse_map_access(&mut self, expr: Expr) -> Result { + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + + let mut keys = vec![MapAccessKey { + key, + syntax: MapAccessSyntax::Bracket, + }]; + loop { + let key = match self.peek_token().token { + Token::LBracket => { + self.next_token(); // consume `[` + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + MapAccessKey { + key, + syntax: MapAccessSyntax::Bracket, + } + } + // Access on BigQuery nested and repeated expressions can + // mix notations in the same expression. + // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns + Token::Period if dialect_of!(self is BigQueryDialect) => { + self.next_token(); // consume `.` + MapAccessKey { + key: self.parse_expr()?, + syntax: MapAccessSyntax::Period, + } + } + _ => break, + }; + keys.push(key); + } + + Ok(Expr::MapAccess { + column: Box::new(expr), + keys, + }) + } + + /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] + /// + /// # Errors + /// This method will raise an error if the column list is empty or with invalid identifiers, + /// the match expression is not a literal string, or if the search modifier is not valid. + pub fn parse_match_against(&mut self) -> Result { + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + + self.expect_keyword(Keyword::AGAINST)?; + + self.expect_token(&Token::LParen)?; + + // MySQL is too permissive about the value, IMO we can't validate it perfectly on syntax level. + let match_value = self.parse_value()?; + + let in_natural_language_mode_keywords = &[ + Keyword::IN, + Keyword::NATURAL, + Keyword::LANGUAGE, + Keyword::MODE, + ]; + + let with_query_expansion_keywords = &[Keyword::WITH, Keyword::QUERY, Keyword::EXPANSION]; + + let in_boolean_mode_keywords = &[Keyword::IN, Keyword::BOOLEAN, Keyword::MODE]; + + let opt_search_modifier = if self.parse_keywords(in_natural_language_mode_keywords) { + if self.parse_keywords(with_query_expansion_keywords) { + Some(SearchModifier::InNaturalLanguageModeWithQueryExpansion) + } else { + Some(SearchModifier::InNaturalLanguageMode) + } + } else if self.parse_keywords(in_boolean_mode_keywords) { + Some(SearchModifier::InBooleanMode) + } else if self.parse_keywords(with_query_expansion_keywords) { + Some(SearchModifier::WithQueryExpansion) + } else { + None + }; + + self.expect_token(&Token::RParen)?; + + Ok(Expr::MatchAgainst { + columns, + match_value, + opt_search_modifier, + }) + } + + pub fn parse_not(&mut self) -> Result { + match self.peek_token().token { + Token::Word(w) => match w.keyword { + Keyword::EXISTS => { + let negated = true; + let _ = self.parse_keyword(Keyword::EXISTS); + self.parse_exists_expr(negated) + } + _ => Ok(Expr::UnaryOp { + op: UnaryOperator::Not, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, + ), + }), + }, + _ => Ok(Expr::UnaryOp { + op: UnaryOperator::Not, + expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), + }), + } + } + + pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FORMAT) { + let value = self.parse_value()?; + match self.parse_optional_time_zone()? { + Some(tz) => Ok(Some(CastFormat::ValueAtTimeZone(value, tz))), + None => Ok(Some(CastFormat::Value(value))), + } + } else { + Ok(None) + } + } + + pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + let expressions = if self.parse_keyword(Keyword::ALL) { + None + } else { + Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) + }; + + let mut modifiers = vec![]; + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + loop { + if !self.parse_keyword(Keyword::WITH) { + break; + } + let keyword = self.expect_one_of_keywords(&[ + Keyword::ROLLUP, + Keyword::CUBE, + Keyword::TOTALS, + ])?; + modifiers.push(match keyword { + Keyword::ROLLUP => GroupByWithModifier::Rollup, + Keyword::CUBE => GroupByWithModifier::Cube, + Keyword::TOTALS => GroupByWithModifier::Totals, + _ => { + return parser_err!( + "BUG: expected to match GroupBy modifier keyword", + self.peek_token().span.start + ) + } + }); + } + } + let group_by = match expressions { + None => GroupByExpr::All(modifiers), + Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), + }; + Ok(Some(group_by)) + } else { + Ok(None) + } + } + + pub fn parse_optional_precision_scale( + &mut self, + ) -> Result<(Option, Option), ParserError> { + if self.consume_token(&Token::LParen) { + let n = self.parse_literal_uint()?; + let scale = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_uint()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((Some(n), scale)) + } else { + Ok((None, None)) + } + } + + pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { + self.parse_value().map(Some) + } else { + Ok(None) + } + } + + pub fn parse_overlay_expr(&mut self) -> Result { + // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_keyword(Keyword::PLACING)?; + let what_expr = self.parse_expr()?; + self.expect_keyword(Keyword::FROM)?; + let from_expr = self.parse_expr()?; + let mut for_expr = None; + if self.parse_keyword(Keyword::FOR) { + for_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Overlay { + expr: Box::new(expr), + overlay_what: Box::new(what_expr), + overlay_from: Box::new(from_expr), + overlay_for: for_expr.map(Box::new), + }) + } + + pub fn parse_position_expr(&mut self, ident: Ident) -> Result { + let between_prec = self.dialect.prec_value(Precedence::Between); + let position_expr = self.maybe_parse(|p| { + // PARSE SELECT POSITION('@' in field) + p.expect_token(&Token::LParen)?; + + // Parse the subexpr till the IN keyword + let expr = p.parse_subexpr(between_prec)?; + p.expect_keyword(Keyword::IN)?; + let from = p.parse_expr()?; + p.expect_token(&Token::RParen)?; + Ok(Expr::Position { + expr: Box::new(expr), + r#in: Box::new(from), + }) + })?; + match position_expr { + Some(expr) => Ok(expr), + // Snowflake supports `position` as an ordinary function call + // without the special `IN` syntax. + None => self.parse_function(ObjectName(vec![ident])), + } + } + + /// Parse an expression prefix. + pub fn parse_prefix(&mut self) -> Result { + // allow the dialect to override prefix parsing + if let Some(prefix) = self.dialect.parse_prefix(self) { + return prefix; + } + + // PostgreSQL allows any string literal to be preceded by a type name, indicating that the + // string literal represents a literal of that type. Some examples: + // + // DATE '2020-05-20' + // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' + // BOOL 'true' + // + // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating + // matters is the fact that INTERVAL string literals may optionally be followed by special + // keywords, e.g.: + // + // INTERVAL '7' DAY + // + // Note also that naively `SELECT date` looks like a syntax error because the `date` type + // name is not followed by a string literal, but in fact in PostgreSQL it is a valid + // expression that should parse as the column name "date". + let loc = self.peek_token().span.start; + let opt_expr = self.maybe_parse(|parser| { + match parser.parse_data_type()? { + DataType::Interval => parser.parse_interval(), + // PostgreSQL allows almost any identifier to be used as custom data type name, + // and we support that in `parse_data_type()`. But unlike Postgres we don't + // have a list of globally reserved keywords (since they vary across dialects), + // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type + // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of + // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the + // `type 'string'` syntax for the custom data types at all. + DataType::Custom(..) => parser_err!("dummy", loc), + data_type => Ok(Expr::TypedString { + data_type, + value: parser.parse_literal_string()?, + }), + } + })?; + + if let Some(expr) = opt_expr { + return Ok(expr); + } + + let next_token = self.next_token(); + let expr = match next_token.token { + Token::Word(w) => { + // The word we consumed may fall into one of two cases: it has a special meaning, or not. + // For example, in Snowflake, the word `interval` may have two meanings depending on the context: + // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM tbl;` + // ^^^^^^^^^^^^^^^^ ^^^^^^^^ + // interval expression identifier + // + // We first try to parse the word and following tokens as a special expression, and if that fails, + // we rollback and try to parse it as an identifier. + match self.try_parse(|parser| { + parser.parse_expr_prefix_by_reserved_word(&w, next_token.span) + }) { + // This word indicated an expression prefix and parsing was successful + Ok(Some(expr)) => Ok(expr), + + // No expression prefix associated with this word + Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?), + + // If parsing of the word as a special expression failed, we are facing two options: + // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`) + // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl` + // We first try to parse the word as an identifier and if that fails + // we rollback and return the parsing error we got from trying to parse a + // special expression (to maintain backwards compatibility of parsing errors). + Err(e) => { + if !self.dialect.is_reserved_for_identifier(w.keyword) { + if let Ok(Some(expr)) = self.maybe_parse(|parser| { + parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span) + }) { + return Ok(expr); + } + } + return Err(e); + } + } + } // End of Token::Word + // array `[1, 2, 3]` + Token::LBracket => self.parse_array_expr(false), + tok @ Token::Minus | tok @ Token::Plus => { + let op = if tok == Token::Plus { + UnaryOperator::Plus + } else { + UnaryOperator::Minus + }; + Ok(Expr::UnaryOp { + op, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?, + ), + }) + } + Token::ExclamationMark if self.dialect.supports_bang_not_operator() => { + Ok(Expr::UnaryOp { + op: UnaryOperator::BangNot, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, + ), + }) + } + tok @ Token::DoubleExclamationMark + | tok @ Token::PGSquareRoot + | tok @ Token::PGCubeRoot + | tok @ Token::AtSign + | tok @ Token::Tilde + if dialect_of!(self is PostgreSqlDialect) => + { + let op = match tok { + Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, + Token::PGSquareRoot => UnaryOperator::PGSquareRoot, + Token::PGCubeRoot => UnaryOperator::PGCubeRoot, + Token::AtSign => UnaryOperator::PGAbs, + Token::Tilde => UnaryOperator::PGBitwiseNot, + _ => unreachable!(), + }; + Ok(Expr::UnaryOp { + op, + expr: Box::new( + self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?, + ), + }) + } + Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::UnicodeStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::Number(_, _) + | Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::TripleSingleQuotedString(_) + | Token::TripleDoubleQuotedString(_) + | Token::DollarQuotedString(_) + | Token::SingleQuotedByteStringLiteral(_) + | Token::DoubleQuotedByteStringLiteral(_) + | Token::TripleSingleQuotedByteStringLiteral(_) + | Token::TripleDoubleQuotedByteStringLiteral(_) + | Token::SingleQuotedRawStringLiteral(_) + | Token::DoubleQuotedRawStringLiteral(_) + | Token::TripleSingleQuotedRawStringLiteral(_) + | Token::TripleDoubleQuotedRawStringLiteral(_) + | Token::NationalStringLiteral(_) + | Token::HexStringLiteral(_) => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::LParen => { + let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda()? { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => unreachable!(), // parse_comma_separated ensures 1 or more + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; + self.expect_token(&Token::RParen)?; + let expr = self.try_parse_method(expr)?; + if !self.consume_token(&Token::Period) { + Ok(expr) + } else { + let tok = self.next_token(); + let key = match tok.token { + Token::Word(word) => word.to_ident(tok.span), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.span.start + ) + } + }; + Ok(Expr::CompositeAccess { + expr: Box::new(expr), + key, + }) + } + } + Token::Placeholder(_) | Token::Colon | Token::AtSign => { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } + Token::LBrace if self.dialect.supports_dictionary_syntax() => { + self.prev_token(); + self.parse_duckdb_struct_literal() + } + _ => self.expected("an expression", next_token), + }?; + + let expr = self.try_parse_method(expr)?; + + if self.parse_keyword(Keyword::COLLATE) { + Ok(Expr::Collate { + expr: Box::new(expr), + collation: self.parse_object_name(false)?, + }) + } else { + Ok(expr) + } + } + + /// Parses an array subscript like `[1:3]` + /// + /// Parser is right after `[` + pub fn parse_subscript(&mut self, expr: Expr) -> Result { + let subscript = self.parse_subscript_inner()?; + Ok(Expr::Subscript { + expr: Box::new(expr), + subscript: Box::new(subscript), + }) + } + + pub fn parse_substring_expr(&mut self) -> Result { + // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + let mut from_expr = None; + let special = self.consume_token(&Token::Comma); + if special || self.parse_keyword(Keyword::FROM) { + from_expr = Some(self.parse_expr()?); + } + + let mut to_expr = None; + if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { + to_expr = Some(self.parse_expr()?); + } + self.expect_token(&Token::RParen)?; + + Ok(Expr::Substring { + expr: Box::new(expr), + substring_from: from_expr.map(Box::new), + substring_for: to_expr.map(Box::new), + special, + }) + } + + pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { + let args = if self.consume_token(&Token::LParen) { + FunctionArguments::List(self.parse_function_argument_list()?) + } else { + FunctionArguments::None + }; + Ok(Expr::Function(Function { + name, + parameters: FunctionArguments::None, + args, + filter: None, + over: None, + null_treatment: None, + within_group: vec![], + })) + } + + /// ```sql + /// TRIM ([WHERE] ['text' FROM] 'text') + /// TRIM ('text') + /// TRIM(, [, characters]) -- only Snowflake or BigQuery + /// ``` + pub fn parse_trim_expr(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let mut trim_where = None; + if let Token::Word(word) = self.peek_token().token { + if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] + .iter() + .any(|d| word.keyword == *d) + { + trim_where = Some(self.parse_trim_where()?); + } + } + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::FROM) { + let trim_what = Box::new(expr); + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where, + trim_what: Some(trim_what), + trim_characters: None, + }) + } else if self.consume_token(&Token::Comma) + && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) + { + let characters = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where: None, + trim_what: None, + trim_characters: Some(characters), + }) + } else { + self.expect_token(&Token::RParen)?; + Ok(Expr::Trim { + expr: Box::new(expr), + trim_where, + trim_what: None, + trim_characters: None, + }) + } + } + + pub fn parse_trim_where(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::BOTH => Ok(TrimWhereField::Both), + Keyword::LEADING => Ok(TrimWhereField::Leading), + Keyword::TRAILING => Ok(TrimWhereField::Trailing), + _ => self.expected("trim_where field", next_token)?, + }, + _ => self.expected("trim_where field", next_token), + } + } + + /// Parse a new expression including wildcard & qualified wildcard. + pub fn parse_wildcard_expr(&mut self) -> Result { + let index = self.index; + + let next_token = self.next_token(); + match next_token.token { + t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { + if self.peek_token().token == Token::Period { + let mut id_parts: Vec = vec![match t { + Token::Word(w) => w.to_ident(next_token.span), + Token::SingleQuotedString(s) => Ident::with_quote('\'', s), + _ => unreachable!(), // We matched above + }]; + + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), + Token::SingleQuotedString(s) => { + // SQLite has single-quoted identifiers + id_parts.push(Ident::with_quote('\'', s)) + } + Token::Mul => { + return Ok(Expr::QualifiedWildcard( + ObjectName(id_parts), + AttachedToken(next_token), + )); + } + _ => { + return self + .expected("an identifier or a '*' after '.'", next_token); + } + } + } + } + } + Token::Mul => { + return Ok(Expr::Wildcard(AttachedToken(next_token))); + } + _ => (), + }; + + self.index = index; + self.parse_expr() + } + + /// Parse a Struct type definition as a sequence of field-value pairs. + /// The syntax of the Struct elem differs by dialect so it is customised + /// by the `elem_parser` argument. + /// + /// Syntax + /// ```sql + /// Hive: + /// STRUCT + /// + /// BigQuery: + /// STRUCT<[field_name] field_type> + /// ``` + pub(crate) fn parse_struct_type_def( + &mut self, + mut elem_parser: F, + ) -> Result<(Vec, MatchedTrailingBracket), ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, + { + let start_token = self.peek_token(); + self.expect_keyword(Keyword::STRUCT)?; + + // Nothing to do if we have no type information. + if Token::Lt != self.peek_token() { + return Ok((Default::default(), false.into())); + } + self.next_token(); + + let mut field_defs = vec![]; + let trailing_bracket = loop { + let (def, trailing_bracket) = elem_parser(self)?; + field_defs.push(def); + if !self.consume_token(&Token::Comma) { + break trailing_bracket; + } + + // Angle brackets are balanced so we only expect the trailing `>>` after + // we've matched all field types for the current struct. + // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` + if trailing_bracket.0 { + return parser_err!("unmatched > in STRUCT definition", start_token.span.start); + } + }; + + Ok(( + field_defs, + self.expect_closing_angle_bracket(trailing_bracket)?, + )) + } + + pub(crate) fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { + if !self.peek_sub_query() { + return Ok(None); + } + + Ok(Some(Expr::Subquery(self.parse_query()?))) + } + + fn parse_duplicate_treatment(&mut self) -> Result, ParserError> { + let loc = self.peek_token().span.start; + match ( + self.parse_keyword(Keyword::ALL), + self.parse_keyword(Keyword::DISTINCT), + ) { + (true, false) => Ok(Some(DuplicateTreatment::All)), + (false, true) => Ok(Some(DuplicateTreatment::Distinct)), + (false, false) => Ok(None), + (true, true) => parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc), + } + } + + // Tries to parse an expression by matching the specified word to known keywords that have a special meaning in the dialect. + // Returns `None if no match is found. + fn parse_expr_prefix_by_reserved_word( + &mut self, + w: &Word, + w_span: Span, + ) -> Result, ParserError> { + match w.keyword { + Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => { + self.prev_token(); + Ok(Some(Expr::Value(self.parse_value()?))) + } + Keyword::NULL => { + self.prev_token(); + Ok(Some(Expr::Value(self.parse_value()?))) + } + Keyword::CURRENT_CATALOG + | Keyword::CURRENT_USER + | Keyword::SESSION_USER + | Keyword::USER + if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + Ok(Some(Expr::Function(Function { + name: ObjectName(vec![w.to_ident(w_span)]), + parameters: FunctionArguments::None, + args: FunctionArguments::None, + null_treatment: None, + filter: None, + over: None, + within_group: vec![], + }))) + } + Keyword::CURRENT_TIMESTAMP + | Keyword::CURRENT_TIME + | Keyword::CURRENT_DATE + | Keyword::LOCALTIME + | Keyword::LOCALTIMESTAMP => { + Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?)) + } + Keyword::CASE => Ok(Some(self.parse_case_expr()?)), + Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)), + Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => Ok(Some(self.parse_convert_expr(true)?)), + Keyword::CAST => Ok(Some(self.parse_cast_expr(CastKind::Cast)?)), + Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)), + Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)), + Keyword::EXISTS + // Support parsing Databricks has a function named `exists`. + if !dialect_of!(self is DatabricksDialect) + || matches!( + self.peek_nth_token(1).token, + Token::Word(Word { + keyword: Keyword::SELECT | Keyword::WITH, + .. + }) + ) => + { + Ok(Some(self.parse_exists_expr(false)?)) + } + Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)), + Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), + Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), + Keyword::POSITION if self.peek_token().token == Token::LParen => { + Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) + } + Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)), + Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)), + Keyword::TRIM => Ok(Some(self.parse_trim_expr()?)), + Keyword::INTERVAL => Ok(Some(self.parse_interval()?)), + // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call + Keyword::ARRAY if self.peek_token() == Token::LBracket => { + self.expect_token(&Token::LBracket)?; + Ok(Some(self.parse_array_expr(true)?)) + } + Keyword::ARRAY + if self.peek_token() == Token::LParen + && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => + { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + Ok(Some(Expr::Function(Function { + name: ObjectName(vec![w.to_ident(w_span)]), + parameters: FunctionArguments::None, + args: FunctionArguments::Subquery(query), + filter: None, + null_treatment: None, + over: None, + within_group: vec![], + }))) + } + Keyword::NOT => Ok(Some(self.parse_not()?)), + Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { + Ok(Some(self.parse_match_against()?)) + } + Keyword::STRUCT if self.dialect.supports_struct_literal() => { + Ok(Some(self.parse_struct_literal()?)) + } + Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { + let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; + Ok(Some(Expr::Prior(Box::new(expr)))) + } + Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { + Ok(Some(self.parse_duckdb_map_literal()?)) + } + _ => Ok(None) + } + } + + // Tries to parse an expression by a word that is not known to have a special meaning in the dialect. + fn parse_expr_prefix_by_unreserved_word( + &mut self, + w: &Word, + w_span: Span, + ) -> Result { + match self.peek_token().token { + Token::LParen | Token::Period => { + let mut id_parts: Vec = vec![w.to_ident(w_span)]; + let mut ending_wildcard: Option = None; + while self.consume_token(&Token::Period) { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), + Token::Mul => { + // Postgres explicitly allows funcnm(tablenm.*) and the + // function array_agg traverses this control flow + if dialect_of!(self is PostgreSqlDialect) { + ending_wildcard = Some(next_token); + break; + } else { + return self.expected("an identifier after '.'", next_token); + } + } + Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), + _ => { + return self.expected("an identifier or a '*' after '.'", next_token); + } + } + } + + if let Some(wildcard_token) = ending_wildcard { + Ok(Expr::QualifiedWildcard( + ObjectName(id_parts), + AttachedToken(wildcard_token), + )) + } else if self.consume_token(&Token::LParen) { + if dialect_of!(self is SnowflakeDialect | MsSqlDialect) + && self.consume_tokens(&[Token::Plus, Token::RParen]) + { + Ok(Expr::OuterJoin(Box::new( + match <[Ident; 1]>::try_from(id_parts) { + Ok([ident]) => Expr::Identifier(ident), + Err(parts) => Expr::CompoundIdentifier(parts), + }, + ))) + } else { + self.prev_token(); + self.parse_function(ObjectName(id_parts)) + } + } else { + Ok(Expr::CompoundIdentifier(id_parts)) + } + } + // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html + Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) + | Token::HexStringLiteral(_) + if w.value.starts_with('_') => + { + Ok(Expr::IntroducedString { + introducer: w.value.clone(), + value: self.parse_introduced_string_value()?, + }) + } + Token::Arrow if self.dialect.supports_lambda_functions() => { + self.expect_token(&Token::Arrow)?; + Ok(Expr::Lambda(LambdaFunction { + params: OneOrManyWithParens::One(w.to_ident(w_span)), + body: Box::new(self.parse_expr()?), + })) + } + _ => Ok(Expr::Identifier(w.to_ident(w_span))), + } + } + + /// Parses a potentially empty list of arguments to a window function + /// (including the closing parenthesis). + /// + /// Examples: + /// ```sql + /// FIRST_VALUE(x ORDER BY 1,2,3); + /// FIRST_VALUE(x IGNORE NULL); + /// ``` + fn parse_function_argument_list(&mut self) -> Result { + let mut clauses = vec![]; + + // For MSSQL empty argument list with json-null-clause case, e.g. `JSON_ARRAY(NULL ON NULL)` + if let Some(null_clause) = self.parse_json_null_clause() { + clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); + } + + if self.consume_token(&Token::RParen) { + return Ok(FunctionArgumentList { + duplicate_treatment: None, + args: vec![], + clauses, + }); + } + + let duplicate_treatment = self.parse_duplicate_treatment()?; + let args = self.parse_comma_separated(Parser::parse_function_args)?; + + if self.dialect.supports_window_function_null_treatment_arg() { + if let Some(null_treatment) = self.parse_null_treatment()? { + clauses.push(FunctionArgumentClause::IgnoreOrRespectNulls(null_treatment)); + } + } + + if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + clauses.push(FunctionArgumentClause::OrderBy( + self.parse_comma_separated(Parser::parse_order_by_expr)?, + )); + } + + if self.parse_keyword(Keyword::LIMIT) { + clauses.push(FunctionArgumentClause::Limit(self.parse_expr()?)); + } + + if dialect_of!(self is GenericDialect | BigQueryDialect) + && self.parse_keyword(Keyword::HAVING) + { + let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { + Keyword::MIN => HavingBoundKind::Min, + Keyword::MAX => HavingBoundKind::Max, + _ => unreachable!(), + }; + clauses.push(FunctionArgumentClause::Having(HavingBound( + kind, + self.parse_expr()?, + ))) + } + + if dialect_of!(self is GenericDialect | MySqlDialect) + && self.parse_keyword(Keyword::SEPARATOR) + { + clauses.push(FunctionArgumentClause::Separator(self.parse_value()?)); + } + + if let Some(on_overflow) = self.parse_listagg_on_overflow()? { + clauses.push(FunctionArgumentClause::OnOverflow(on_overflow)); + } + + if let Some(null_clause) = self.parse_json_null_clause() { + clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); + } + + self.expect_token(&Token::RParen)?; + Ok(FunctionArgumentList { + duplicate_treatment, + args, + clauses, + }) + } + + /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. + fn parse_group_by_expr(&mut self) -> Result { + if self.dialect.supports_group_by_expr() { + if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; + self.expect_token(&Token::RParen)?; + Ok(Expr::GroupingSets(result)) + } else if self.parse_keyword(Keyword::CUBE) { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Cube(result)) + } else if self.parse_keyword(Keyword::ROLLUP) { + self.expect_token(&Token::LParen)?; + let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; + self.expect_token(&Token::RParen)?; + Ok(Expr::Rollup(result)) + } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { + // PostgreSQL allow to use empty tuple as a group by expression, + // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in + // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) + Ok(Expr::Tuple(vec![])) + } else { + self.parse_expr() + } + } else { + // TODO parse rollup for other dialects + self.parse_expr() + } + } + + fn parse_json_access(&mut self, expr: Expr) -> Result { + let path = self.parse_json_path()?; + Ok(Expr::JsonAccess { + value: Box::new(expr), + path, + }) + } + + /// Parses MSSQL's json-null-clause + fn parse_json_null_clause(&mut self) -> Option { + if self.parse_keywords(&[Keyword::ABSENT, Keyword::ON, Keyword::NULL]) { + Some(JsonNullClause::AbsentOnNull) + } else if self.parse_keywords(&[Keyword::NULL, Keyword::ON, Keyword::NULL]) { + Some(JsonNullClause::NullOnNull) + } else { + None + } + } + + pub(crate) fn parse_json_path(&mut self) -> Result { + let mut path = Vec::new(); + loop { + match self.next_token().token { + Token::Colon if path.is_empty() => { + path.push(self.parse_json_path_object_key()?); + } + Token::Period if !path.is_empty() => { + path.push(self.parse_json_path_object_key()?); + } + Token::LBracket => { + let key = self.parse_expr()?; + self.expect_token(&Token::RBracket)?; + + path.push(JsonPathElem::Bracket { key }); + } + _ => { + self.prev_token(); + break; + } + }; + } + + debug_assert!(!path.is_empty()); + Ok(JsonPath { path }) + } + + fn parse_json_path_object_key(&mut self) -> Result { + let token = self.next_token(); + match token.token { + Token::Word(Word { + value, + // path segments in SF dot notation can be unquoted or double-quoted + quote_style: quote_style @ (Some('"') | None), + // some experimentation suggests that snowflake permits + // any keyword here unquoted. + keyword: _, + }) => Ok(JsonPathElem::Dot { + key: value, + quoted: quote_style.is_some(), + }), + + // This token should never be generated on snowflake or generic + // dialects, but we handle it just in case this is used on future + // dialects. + Token::DoubleQuotedString(key) => Ok(JsonPathElem::Dot { key, quoted: true }), + + _ => self.expected("variant object key name", token), + } + } + + /// Optionally parses a null treatment clause. + fn parse_null_treatment(&mut self) -> Result, ParserError> { + match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) { + Some(keyword) => { + self.expect_keyword(Keyword::NULLS)?; + + Ok(match keyword { + Keyword::RESPECT => Some(NullTreatment::RespectNulls), + Keyword::IGNORE => Some(NullTreatment::IgnoreNulls), + _ => None, + }) + } + None => Ok(None), + } + } + + /// Parse an expression value for a struct literal + /// Syntax + /// ```sql + /// expr [AS name] + /// ``` + /// + /// For biquery [1], Parameter typed_syntax is set to true if the expression + /// is to be parsed as a field expression declared using typed + /// struct syntax [2], and false if using typeless struct syntax [3]. + /// + /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct + /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax + /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax + fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { + let expr = self.parse_expr()?; + if self.parse_keyword(Keyword::AS) { + if typed_syntax { + return parser_err!("Typed syntax does not allow AS", { + self.prev_token(); + self.peek_token().span.start + }); + } + let field_name = self.parse_identifier(false)?; + Ok(Expr::Named { + expr: expr.into(), + name: field_name, + }) + } else { + Ok(expr) + } + } + + /// Syntax + /// ```sql + /// -- typed + /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) + /// -- typeless + /// STRUCT( expr1 [AS field_name] [, ... ]) + /// ``` + fn parse_struct_literal(&mut self) -> Result { + // Parse the fields definition if exist `<[field_name] field_type, ...>` + self.prev_token(); + let (fields, trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def)?; + if trailing_bracket.0 { + return parser_err!( + "unmatched > in STRUCT literal", + self.peek_token().span.start + ); + } + + // Parse the struct values `(expr1 [, ... ])` + self.expect_token(&Token::LParen)?; + let values = self + .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; + self.expect_token(&Token::RParen)?; + + Ok(Expr::Struct { values, fields }) + } + + /// Parses an array subscript like + /// * `[:]` + /// * `[l]` + /// * `[l:]` + /// * `[:u]` + /// * `[l:u]` + /// * `[l:u:s]` + /// + /// Parser is right after `[` + fn parse_subscript_inner(&mut self) -> Result { + // at either `:(rest)` or `:(rest)]` + let lower_bound = if self.consume_token(&Token::Colon) { + None + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + if let Some(lower_bound) = lower_bound { + return Ok(Subscript::Index { index: lower_bound }); + }; + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } + + // consume the `:` + if lower_bound.is_some() { + self.expect_token(&Token::Colon)?; + } + + // we are now at either `]`, `(rest)]` + let upper_bound = if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound: None, + stride: None, + }); + } else { + Some(self.parse_expr()?) + }; + + // check for end + if self.consume_token(&Token::RBracket) { + return Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride: None, + }); + } + + // we are now at `:]` or `:stride]` + self.expect_token(&Token::Colon)?; + let stride = if self.consume_token(&Token::RBracket) { + None + } else { + Some(self.parse_expr()?) + }; + + if stride.is_some() { + self.expect_token(&Token::RBracket)?; + } + + Ok(Subscript::Slice { + lower_bound, + upper_bound, + stride, + }) + } + + /// Parse a tuple with `(` and `)`. + /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. + /// If `allow_empty` is true, then an empty tuple is allowed. + fn parse_tuple( + &mut self, + lift_singleton: bool, + allow_empty: bool, + ) -> Result, ParserError> { + if lift_singleton { + if self.consume_token(&Token::LParen) { + let result = if allow_empty && self.consume_token(&Token::RParen) { + vec![] + } else { + let result = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + result + }; + Ok(result) + } else { + Ok(vec![self.parse_expr()?]) + } + } else { + self.expect_token(&Token::LParen)?; + let result = if allow_empty && self.consume_token(&Token::RParen) { + vec![] + } else { + let result = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + result + }; + Ok(result) + } + } + + fn try_parse_lambda(&mut self) -> Result, ParserError> { + if !self.dialect.supports_lambda_functions() { + return Ok(None); + } + self.maybe_parse(|p| { + let params = p.parse_comma_separated(|p| p.parse_identifier(false))?; + p.expect_token(&Token::RParen)?; + p.expect_token(&Token::Arrow)?; + let expr = p.parse_expr()?; + Ok(Expr::Lambda(LambdaFunction { + params: OneOrManyWithParens::Many(params), + body: Box::new(expr), + })) + }) + } + + /// Parses method call expression + fn try_parse_method(&mut self, expr: Expr) -> Result { + if !self.dialect.supports_methods() { + return Ok(expr); + } + let method_chain = self.maybe_parse(|p| { + let mut method_chain = Vec::new(); + while p.consume_token(&Token::Period) { + let tok = p.next_token(); + let name = match tok.token { + Token::Word(word) => word.to_ident(tok.span), + _ => return p.expected("identifier", tok), + }; + let func = match p.parse_function(ObjectName(vec![name]))? { + Expr::Function(func) => func, + _ => return p.expected("function", p.peek_token()), + }; + method_chain.push(func); + } + if !method_chain.is_empty() { + Ok(method_chain) + } else { + p.expected("function", p.peek_token()) + } + })?; + if let Some(method_chain) = method_chain { + Ok(Expr::Method(Method { + expr: Box::new(expr), + method_chain, + })) + } else { + Ok(expr) + } + } +} diff --git a/src/parser/fetch.rs b/src/parser/fetch.rs new file mode 100644 index 000000000..d89b9445f --- /dev/null +++ b/src/parser/fetch.rs @@ -0,0 +1,64 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + // FETCH [ direction { FROM | IN } ] cursor INTO target; + pub fn parse_fetch_statement(&mut self) -> Result { + let direction = if self.parse_keyword(Keyword::NEXT) { + FetchDirection::Next + } else if self.parse_keyword(Keyword::PRIOR) { + FetchDirection::Prior + } else if self.parse_keyword(Keyword::FIRST) { + FetchDirection::First + } else if self.parse_keyword(Keyword::LAST) { + FetchDirection::Last + } else if self.parse_keyword(Keyword::ABSOLUTE) { + FetchDirection::Absolute { + limit: self.parse_number_value()?, + } + } else if self.parse_keyword(Keyword::RELATIVE) { + FetchDirection::Relative { + limit: self.parse_number_value()?, + } + } else if self.parse_keyword(Keyword::FORWARD) { + if self.parse_keyword(Keyword::ALL) { + FetchDirection::ForwardAll + } else { + FetchDirection::Forward { + // TODO: Support optional + limit: Some(self.parse_number_value()?), + } + } + } else if self.parse_keyword(Keyword::BACKWARD) { + if self.parse_keyword(Keyword::ALL) { + FetchDirection::BackwardAll + } else { + FetchDirection::Backward { + // TODO: Support optional + limit: Some(self.parse_number_value()?), + } + } + } else if self.parse_keyword(Keyword::ALL) { + FetchDirection::All + } else { + FetchDirection::Count { + limit: self.parse_number_value()?, + } + }; + + self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; + + let name = self.parse_identifier(false)?; + + let into = if self.parse_keyword(Keyword::INTO) { + Some(self.parse_object_name(false)?) + } else { + None + }; + + Ok(Statement::Fetch { + name, + direction, + into, + }) + } +} diff --git a/src/parser/flush.rs b/src/parser/flush.rs new file mode 100644 index 000000000..1818c4073 --- /dev/null +++ b/src/parser/flush.rs @@ -0,0 +1,92 @@ +use crate::parser::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + pub fn parse_flush(&mut self) -> Result { + let mut channel = None; + let mut tables: Vec = vec![]; + let mut read_lock = false; + let mut export = false; + + if !dialect_of!(self is MySqlDialect | GenericDialect) { + return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start); + } + + let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) { + Some(FlushLocation::NoWriteToBinlog) + } else if self.parse_keyword(Keyword::LOCAL) { + Some(FlushLocation::Local) + } else { + None + }; + + let object_type = if self.parse_keywords(&[Keyword::BINARY, Keyword::LOGS]) { + FlushType::BinaryLogs + } else if self.parse_keywords(&[Keyword::ENGINE, Keyword::LOGS]) { + FlushType::EngineLogs + } else if self.parse_keywords(&[Keyword::ERROR, Keyword::LOGS]) { + FlushType::ErrorLogs + } else if self.parse_keywords(&[Keyword::GENERAL, Keyword::LOGS]) { + FlushType::GeneralLogs + } else if self.parse_keywords(&[Keyword::HOSTS]) { + FlushType::Hosts + } else if self.parse_keyword(Keyword::PRIVILEGES) { + FlushType::Privileges + } else if self.parse_keyword(Keyword::OPTIMIZER_COSTS) { + FlushType::OptimizerCosts + } else if self.parse_keywords(&[Keyword::RELAY, Keyword::LOGS]) { + if self.parse_keywords(&[Keyword::FOR, Keyword::CHANNEL]) { + channel = Some(self.parse_object_name(false).unwrap().to_string()); + } + FlushType::RelayLogs + } else if self.parse_keywords(&[Keyword::SLOW, Keyword::LOGS]) { + FlushType::SlowLogs + } else if self.parse_keyword(Keyword::STATUS) { + FlushType::Status + } else if self.parse_keyword(Keyword::USER_RESOURCES) { + FlushType::UserResources + } else if self.parse_keywords(&[Keyword::LOGS]) { + FlushType::Logs + } else if self.parse_keywords(&[Keyword::TABLES]) { + loop { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::WITH => { + read_lock = self.parse_keywords(&[Keyword::READ, Keyword::LOCK]); + } + Keyword::FOR => { + export = self.parse_keyword(Keyword::EXPORT); + } + Keyword::NoKeyword => { + self.prev_token(); + tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; + } + _ => {} + }, + _ => { + break; + } + } + } + + FlushType::Tables + } else { + return self.expected( + "BINARY LOGS, ENGINE LOGS, ERROR LOGS, GENERAL LOGS, HOSTS, LOGS, PRIVILEGES, OPTIMIZER_COSTS,\ + RELAY LOGS [FOR CHANNEL channel], SLOW LOGS, STATUS, USER_RESOURCES", + self.peek_token(), + ); + }; + + Ok(Statement::Flush { + object_type, + location, + channel, + read_lock, + export, + tables, + }) + } +} diff --git a/src/parser/grant.rs b/src/parser/grant.rs new file mode 100644 index 000000000..eed67346f --- /dev/null +++ b/src/parser/grant.rs @@ -0,0 +1,157 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a GRANT statement. + pub fn parse_grant(&mut self) -> Result { + let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; + + self.expect_keyword(Keyword::TO)?; + let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; + + let with_grant_option = + self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); + + let granted_by = self + .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) + .then(|| self.parse_identifier(false).unwrap()); + + Ok(Statement::Grant { + privileges, + objects, + grantees, + with_grant_option, + granted_by, + }) + } + + pub fn parse_actions_list(&mut self) -> Result, ParserError> { + let mut values = vec![]; + loop { + values.push(self.parse_grant_permission()?); + if !self.consume_token(&Token::Comma) { + break; + } else if self.options.trailing_commas { + match self.peek_token().token { + Token::Word(kw) if kw.keyword == Keyword::ON => { + break; + } + Token::RParen + | Token::SemiColon + | Token::EOF + | Token::RBracket + | Token::RBrace => break, + _ => continue, + } + } + } + Ok(values) + } + + pub fn parse_grant_permission(&mut self) -> Result { + if let Some(kw) = self.parse_one_of_keywords(&[ + Keyword::CONNECT, + Keyword::CREATE, + Keyword::DELETE, + Keyword::EXECUTE, + Keyword::INSERT, + Keyword::REFERENCES, + Keyword::SELECT, + Keyword::TEMPORARY, + Keyword::TRIGGER, + Keyword::TRUNCATE, + Keyword::UPDATE, + Keyword::USAGE, + ]) { + let columns = match kw { + Keyword::INSERT | Keyword::REFERENCES | Keyword::SELECT | Keyword::UPDATE => { + let columns = self.parse_parenthesized_column_list(Optional, false)?; + if columns.is_empty() { + None + } else { + Some(columns) + } + } + _ => None, + }; + Ok((kw, columns)) + } else { + self.expected("a privilege keyword", self.peek_token())? + } + } + + pub fn parse_grant_revoke_privileges_objects( + &mut self, + ) -> Result<(Privileges, GrantObjects), ParserError> { + let privileges = if self.parse_keyword(Keyword::ALL) { + Privileges::All { + with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), + } + } else { + let (actions, err): (Vec<_>, Vec<_>) = self + .parse_actions_list()? + .into_iter() + .map(|(kw, columns)| match kw { + Keyword::DELETE => Ok(Action::Delete), + Keyword::INSERT => Ok(Action::Insert { columns }), + Keyword::REFERENCES => Ok(Action::References { columns }), + Keyword::SELECT => Ok(Action::Select { columns }), + Keyword::TRIGGER => Ok(Action::Trigger), + Keyword::TRUNCATE => Ok(Action::Truncate), + Keyword::UPDATE => Ok(Action::Update { columns }), + Keyword::USAGE => Ok(Action::Usage), + Keyword::CONNECT => Ok(Action::Connect), + Keyword::CREATE => Ok(Action::Create), + Keyword::EXECUTE => Ok(Action::Execute), + Keyword::TEMPORARY => Ok(Action::Temporary), + // This will cover all future added keywords to + // parse_grant_permission and unhandled in this + // match + _ => Err(kw), + }) + .partition(Result::is_ok); + + if !err.is_empty() { + let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); + return Err(ParserError::ParserError(format!( + "INTERNAL ERROR: GRANT/REVOKE unexpected keyword(s) - {errors:?}" + ))); + } + let act = actions.into_iter().filter_map(|x| x.ok()).collect(); + Privileges::Actions(act) + }; + + self.expect_keyword(Keyword::ON)?; + + let objects = if self.parse_keywords(&[ + Keyword::ALL, + Keyword::TABLES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + GrantObjects::AllTablesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + } + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::SEQUENCES, + Keyword::IN, + Keyword::SCHEMA, + ]) { + GrantObjects::AllSequencesInSchema { + schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, + } + } else { + let object_type = + self.parse_one_of_keywords(&[Keyword::SEQUENCE, Keyword::SCHEMA, Keyword::TABLE]); + let objects = self.parse_comma_separated(|p| p.parse_object_name(false)); + match object_type { + Some(Keyword::SCHEMA) => GrantObjects::Schemas(objects?), + Some(Keyword::SEQUENCE) => GrantObjects::Sequences(objects?), + Some(Keyword::TABLE) | None => GrantObjects::Tables(objects?), + _ => unreachable!(), + } + }; + + Ok((privileges, objects)) + } +} diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs new file mode 100644 index 000000000..98a2dab10 --- /dev/null +++ b/src/parser/identifier.rs @@ -0,0 +1,278 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) + /// + /// The `in_table_clause` parameter indicates whether the identifier is a table in a FROM, JOIN, or + /// similar table clause. Currently, this is used only to support unquoted hyphenated identifiers in + // this context on BigQuery. + pub fn parse_identifier(&mut self, in_table_clause: bool) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => { + let mut ident = w.to_ident(next_token.span); + + // On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or + // TABLE clause [0]. + // + // The first segment must be an ordinary unquoted identifier, e.g. it must not start + // with a digit. Subsequent segments are either must either be valid identifiers or + // integers, e.g. foo-123 is allowed, but foo-123a is not. + // + // [0] https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical + if dialect_of!(self is BigQueryDialect) + && w.quote_style.is_none() + && in_table_clause + { + let mut requires_whitespace = false; + while matches!(self.peek_token_no_skip().token, Token::Minus) { + self.next_token(); + ident.value.push('-'); + + let token = self + .next_token_no_skip() + .cloned() + .unwrap_or(TokenWithSpan::wrap(Token::EOF)); + requires_whitespace = match token.token { + Token::Word(next_word) if next_word.quote_style.is_none() => { + ident.value.push_str(&next_word.value); + false + } + Token::Number(s, false) if s.chars().all(|c| c.is_ascii_digit()) => { + ident.value.push_str(&s); + true + } + _ => { + return self + .expected("continuation of hyphenated identifier", token); + } + } + } + + // If the last segment was a number, we must check that it's followed by whitespace, + // otherwise foo-123a will be parsed as `foo-123` with the alias `a`. + if requires_whitespace { + let token = self.next_token(); + if !matches!(token.token, Token::EOF | Token::Whitespace(_)) { + return self + .expected("whitespace following hyphenated identifier", token); + } + } + } + Ok(ident) + } + Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), + Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)), + _ => self.expected("identifier", next_token), + } + } + + /// Parse identifiers + pub fn parse_identifiers(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + loop { + match self.peek_token().token { + Token::Word(w) => { + idents.push(w.to_ident(self.peek_token().span)); + } + Token::EOF | Token::Eq => break, + _ => {} + } + self.next_token(); + } + Ok(idents) + } + + /// Parse a possibly qualified, possibly quoted identifier, e.g. + /// `foo` or `myschema."table" + /// + /// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN, + /// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers + /// in this context on BigQuery. + pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { + let mut idents = vec![]; + loop { + if self.dialect.supports_object_name_double_dot_notation() + && idents.len() == 1 + && self.consume_token(&Token::Period) + { + // Empty string here means default schema + idents.push(Ident::new("")); + } + idents.push(self.parse_identifier(in_table_clause)?); + if !self.consume_token(&Token::Period) { + break; + } + } + + // BigQuery accepts any number of quoted identifiers of a table name. + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers + if dialect_of!(self is BigQueryDialect) + && idents.iter().any(|ident| ident.value.contains('.')) + { + idents = idents + .into_iter() + .flat_map(|ident| { + ident + .value + .split('.') + .map(|value| Ident { + value: value.into(), + quote_style: ident.quote_style, + span: ident.span, + }) + .collect::>() + }) + .collect() + } + + Ok(ObjectName(idents)) + } + + /// Strictly parse `identifier AS identifier` + pub fn parse_identifier_with_alias(&mut self) -> Result { + let ident = self.parse_identifier(false)?; + self.expect_keyword(Keyword::AS)?; + let alias = self.parse_identifier(false)?; + Ok(IdentWithAlias { ident, alias }) + } + + /// Parse identifiers of form ident1[.identN]* + /// + /// Similar in functionality to [parse_identifiers], with difference + /// being this function is much more strict about parsing a valid multipart identifier, not + /// allowing extraneous tokens to be parsed, otherwise it fails. + /// + /// For example: + /// + /// ```rust + /// use sqlparser::ast::Ident; + /// use sqlparser::dialect::GenericDialect; + /// use sqlparser::parser::Parser; + /// + /// let dialect = GenericDialect {}; + /// let expected = vec![Ident::new("one"), Ident::new("two")]; + /// + /// // expected usage + /// let sql = "one.two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // parse_identifiers is more loose on what it allows, parsing successfully + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_identifiers().unwrap(); + /// assert_eq!(&actual, &expected); + /// + /// // expected to strictly fail due to + separator + /// let sql = "one + two"; + /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); + /// let actual = parser.parse_multipart_identifier().unwrap_err(); + /// assert_eq!( + /// actual.to_string(), + /// "sql parser error: Unexpected token in identifier: +" + /// ); + /// ``` + /// + /// [parse_identifiers]: Parser::parse_identifiers + pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { + let mut idents = vec![]; + + // expecting at least one word for identifier + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => idents.push(w.to_ident(next_token.span)), + Token::EOF => { + return Err(ParserError::ParserError( + "Empty input when parsing identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + }; + + // parse optional next parts if exist + loop { + match self.next_token().token { + // ensure that optional period is succeeded by another identifier + Token::Period => { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => idents.push(w.to_ident(next_token.span)), + Token::EOF => { + return Err(ParserError::ParserError( + "Trailing period in identifier".to_string(), + ))? + } + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token following period in identifier: {token}" + )))? + } + } + } + Token::EOF => break, + token => { + return Err(ParserError::ParserError(format!( + "Unexpected token in identifier: {token}" + )))? + } + } + } + + Ok(idents) + } + + /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) + /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, + /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` + pub fn parse_optional_alias( + &mut self, + reserved_kwds: &[Keyword], + ) -> Result, ParserError> { + let after_as = self.parse_keyword(Keyword::AS); + let next_token = self.next_token(); + match next_token.token { + // Accept any identifier after `AS` (though many dialects have restrictions on + // keywords that may appear here). If there's no `AS`: don't parse keywords, + // which may start a construct allowed in this position, to be parsed as aliases. + // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, + // not an alias.) + Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { + Ok(Some(w.to_ident(next_token.span))) + } + // MSSQL supports single-quoted strings as aliases for columns + // We accept them as table aliases too, although MSSQL does not. + // + // Note, that this conflicts with an obscure rule from the SQL + // standard, which we don't implement: + // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s + // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline + // character. When it sees such a , your DBMS will + // ignore the and treat the multiple strings as + // a single ." + Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), + // Support for MySql dialect double-quoted string, `AS "HOUR"` for example + Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), + _ => { + if after_as { + return self.expected("an identifier after AS", next_token); + } + self.prev_token(); + Ok(None) // no alias found + } + } + } + + /// Parse `[ident]`, mostly `ident` is name, like: + /// `window_name`, `index_name`, ... + pub fn parse_optional_indent(&mut self) -> Result, ParserError> { + self.maybe_parse(|parser| parser.parse_identifier(false)) + } +} diff --git a/src/parser/insert.rs b/src/parser/insert.rs new file mode 100644 index 000000000..782397e0e --- /dev/null +++ b/src/parser/insert.rs @@ -0,0 +1,200 @@ +use super::*; + +impl<'a> Parser<'a> { + /// Parse an INSERT statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + pub(crate) fn parse_insert_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Insert(self.parse_insert()?))) + } + + /// Parse an INSERT statement + pub fn parse_insert(&mut self) -> Result { + let or = self.parse_conflict_clause(); + let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { + None + } else if self.parse_keyword(Keyword::LOW_PRIORITY) { + Some(MysqlInsertPriority::LowPriority) + } else if self.parse_keyword(Keyword::DELAYED) { + Some(MysqlInsertPriority::Delayed) + } else if self.parse_keyword(Keyword::HIGH_PRIORITY) { + Some(MysqlInsertPriority::HighPriority) + } else { + None + }; + + let ignore = dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::IGNORE); + + let replace_into = false; + + let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); + let into = action == Some(Keyword::INTO); + let overwrite = action == Some(Keyword::OVERWRITE); + + let local = self.parse_keyword(Keyword::LOCAL); + + if self.parse_keyword(Keyword::DIRECTORY) { + let path = self.parse_literal_string()?; + let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { + Some(self.parse_file_format()?) + } else { + None + }; + let source = self.parse_query()?; + Ok(Statement::Directory { + local, + path, + overwrite, + file_format, + source, + }) + } else { + // Hive lets you put table here regardless + let table = self.parse_keyword(Keyword::TABLE); + let table_name = self.parse_object_name(false)?; + + let table_alias = + if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + let is_mysql = dialect_of!(self is MySqlDialect); + + let (columns, partitioned, after_columns, source) = + if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { + (vec![], None, vec![], None) + } else { + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + + let partitioned = self.parse_insert_partition()?; + // Hive allows you to specify columns after partitions as well if you want. + let after_columns = if dialect_of!(self is HiveDialect) { + self.parse_parenthesized_column_list(Optional, false)? + } else { + vec![] + }; + + let source = Some(self.parse_query()?); + + (columns, partitioned, after_columns, source) + }; + + let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::AS) + { + let row_alias = self.parse_object_name(false)?; + let col_aliases = Some(self.parse_parenthesized_column_list(Optional, false)?); + Some(InsertAliases { + row_alias, + col_aliases, + }) + } else { + None + }; + + let on = if self.parse_keyword(Keyword::ON) { + if self.parse_keyword(Keyword::CONFLICT) { + let conflict_target = + if self.parse_keywords(&[Keyword::ON, Keyword::CONSTRAINT]) { + Some(ConflictTarget::OnConstraint(self.parse_object_name(false)?)) + } else if self.peek_token() == Token::LParen { + Some(ConflictTarget::Columns( + self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, + )) + } else { + None + }; + + self.expect_keyword(Keyword::DO)?; + let action = if self.parse_keyword(Keyword::NOTHING) { + OnConflictAction::DoNothing + } else { + self.expect_keyword(Keyword::UPDATE)?; + self.expect_keyword(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + OnConflictAction::DoUpdate(DoUpdate { + assignments, + selection, + }) + }; + + Some(OnInsert::OnConflict(OnConflict { + conflict_target, + action, + })) + } else { + self.expect_keyword(Keyword::DUPLICATE)?; + self.expect_keyword(Keyword::KEY)?; + self.expect_keyword(Keyword::UPDATE)?; + let l = self.parse_comma_separated(Parser::parse_assignment)?; + + Some(OnInsert::DuplicateKeyUpdate(l)) + } + } else { + None + }; + + let returning = if self.parse_keyword(Keyword::RETURNING) { + Some(self.parse_comma_separated(Parser::parse_select_item)?) + } else { + None + }; + + Ok(Statement::Insert(Insert { + or, + table_name, + table_alias, + ignore, + into, + overwrite, + partitioned, + columns, + after_columns, + source, + table, + on, + returning, + replace_into, + priority, + insert_alias, + })) + } + } + + pub(crate) fn parse_conflict_clause(&mut self) -> Option { + if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { + Some(SqliteOnConflict::Replace) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { + Some(SqliteOnConflict::Rollback) + } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { + Some(SqliteOnConflict::Abort) + } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { + Some(SqliteOnConflict::Fail) + } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { + Some(SqliteOnConflict::Ignore) + } else if self.parse_keyword(Keyword::REPLACE) { + Some(SqliteOnConflict::Replace) + } else { + None + } + } + + pub fn parse_insert_partition(&mut self) -> Result>, ParserError> { + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + let partition_cols = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + Ok(partition_cols) + } else { + Ok(None) + } + } +} diff --git a/src/parser/install.rs b/src/parser/install.rs new file mode 100644 index 000000000..92f3c679a --- /dev/null +++ b/src/parser/install.rs @@ -0,0 +1,10 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// `INSTALL [extension_name]` + pub fn parse_install(&mut self) -> Result { + let extension_name = self.parse_identifier(false)?; + + Ok(Statement::Install { extension_name }) + } +} diff --git a/src/parser/keyword.rs b/src/parser/keyword.rs new file mode 100644 index 000000000..609cadccd --- /dev/null +++ b/src/parser/keyword.rs @@ -0,0 +1,22 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a keyword-separated list of 1+ items accepted by `F` + pub fn parse_keyword_separated( + &mut self, + keyword: Keyword, + mut f: F, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if !self.parse_keyword(keyword) { + break; + } + } + Ok(values) + } +} diff --git a/src/parser/kill.rs b/src/parser/kill.rs new file mode 100644 index 000000000..b86d04b21 --- /dev/null +++ b/src/parser/kill.rs @@ -0,0 +1,29 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + // KILL [CONNECTION | QUERY | MUTATION] processlist_id + pub fn parse_kill(&mut self) -> Result { + let modifier_keyword = + self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); + + let id = self.parse_literal_uint()?; + + let modifier = match modifier_keyword { + Some(Keyword::CONNECTION) => Some(KillType::Connection), + Some(Keyword::QUERY) => Some(KillType::Query), + Some(Keyword::MUTATION) => { + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + Some(KillType::Mutation) + } else { + self.expected( + "Unsupported type for KILL, allowed: CONNECTION | QUERY", + self.peek_token(), + )? + } + } + _ => None, + }; + + Ok(Statement::Kill { modifier, id }) + } +} diff --git a/src/parser/listen.rs b/src/parser/listen.rs new file mode 100644 index 000000000..bf87ab8ae --- /dev/null +++ b/src/parser/listen.rs @@ -0,0 +1,8 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_listen(&mut self) -> Result { + let channel = self.parse_identifier(false)?; + Ok(Statement::LISTEN { channel }) + } +} diff --git a/src/parser/lists.rs b/src/parser/lists.rs new file mode 100644 index 000000000..731fd64b8 --- /dev/null +++ b/src/parser/lists.rs @@ -0,0 +1,100 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse the comma of a comma-separated syntax element. + /// Allows for control over trailing commas + /// Returns true if there is a next element + fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool { + if !self.consume_token(&Token::Comma) { + true + } else if trailing_commas { + let token = self.peek_token().token; + match token { + Token::Word(ref kw) + if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => + { + true + } + Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { + true + } + _ => false, + } + } else { + false + } + } + + /// Parse the comma of a comma-separated syntax element. + /// Returns true if there is a next element + pub(crate) fn is_parse_comma_separated_end(&mut self) -> bool { + self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas) + } + + /// Parse a comma-separated list of 1+ items accepted by `F` + pub fn parse_comma_separated(&mut self, f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas) + } + + /// Parse a comma-separated list of 1+ items accepted by `F` + /// Allows for control over trailing commas + pub(crate) fn parse_comma_separated_with_trailing_commas( + &mut self, + mut f: F, + trailing_commas: bool, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + let mut values = vec![]; + loop { + values.push(f(self)?); + if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) { + break; + } + } + Ok(values) + } + + pub fn parse_parenthesized(&mut self, mut f: F) -> Result + where + F: FnMut(&mut Parser<'a>) -> Result, + { + self.expect_token(&Token::LParen)?; + let res = f(self)?; + self.expect_token(&Token::RParen)?; + Ok(res) + } + + /// Parse a comma-separated list of 0+ items accepted by `F` + /// * `end_token` - expected end token for the closure (e.g. [Token::RParen], [Token::RBrace] ...) + pub fn parse_comma_separated0( + &mut self, + f: F, + end_token: Token, + ) -> Result, ParserError> + where + F: FnMut(&mut Parser<'a>) -> Result, + { + if self.peek_token().token == end_token { + return Ok(vec![]); + } + + if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { + let _ = self.consume_token(&Token::Comma); + return Ok(vec![]); + } + + self.parse_comma_separated(f) + } + + pub(crate) fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + Ok(partitions) + } +} diff --git a/src/parser/load.rs b/src/parser/load.rs new file mode 100644 index 000000000..d458b4118 --- /dev/null +++ b/src/parser/load.rs @@ -0,0 +1,50 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a SQL LOAD statement + pub fn parse_load(&mut self) -> Result { + if self.dialect.supports_load_extension() { + let extension_name = self.parse_identifier(false)?; + Ok(Statement::Load { extension_name }) + } else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() { + let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); + self.expect_keyword(Keyword::INPATH)?; + let inpath = self.parse_literal_string()?; + let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some(); + self.expect_keyword(Keyword::INTO)?; + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name(false)?; + let partitioned = self.parse_insert_partition()?; + let table_format = self.parse_load_data_table_format()?; + Ok(Statement::LoadData { + local, + inpath, + overwrite, + table_name, + partitioned, + table_format, + }) + } else { + self.expected( + "`DATA` or an extension name after `LOAD`", + self.peek_token(), + ) + } + } + + pub fn parse_load_data_table_format( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::INPUTFORMAT) { + let input_format = self.parse_expr()?; + self.expect_keyword(Keyword::SERDE)?; + let serde = self.parse_expr()?; + Ok(Some(HiveLoadDataFormat { + input_format, + serde, + })) + } else { + Ok(None) + } + } +} diff --git a/src/parser/merge.rs b/src/parser/merge.rs new file mode 100644 index 000000000..62497ea35 --- /dev/null +++ b/src/parser/merge.rs @@ -0,0 +1,123 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_merge(&mut self) -> Result { + let into = self.parse_keyword(Keyword::INTO); + + let table = self.parse_table_factor()?; + + self.expect_keyword(Keyword::USING)?; + let source = self.parse_table_factor()?; + self.expect_keyword(Keyword::ON)?; + let on = self.parse_expr()?; + let clauses = self.parse_merge_clauses()?; + + Ok(Statement::Merge { + into, + table, + source, + on: Box::new(on), + clauses, + }) + } + + pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { + let mut clauses = vec![]; + loop { + if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { + break; + } + self.expect_keyword(Keyword::WHEN)?; + + let mut clause_kind = MergeClauseKind::Matched; + if self.parse_keyword(Keyword::NOT) { + clause_kind = MergeClauseKind::NotMatched; + } + self.expect_keyword(Keyword::MATCHED)?; + + if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) + { + clause_kind = MergeClauseKind::NotMatchedBySource; + } else if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) + { + clause_kind = MergeClauseKind::NotMatchedByTarget; + } + + let predicate = if self.parse_keyword(Keyword::AND) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect_keyword(Keyword::THEN)?; + + let merge_clause = match self.parse_one_of_keywords(&[ + Keyword::UPDATE, + Keyword::INSERT, + Keyword::DELETE, + ]) { + Some(Keyword::UPDATE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return Err(ParserError::ParserError(format!( + "UPDATE is not allowed in a {clause_kind} merge clause" + ))); + } + self.expect_keyword(Keyword::SET)?; + MergeAction::Update { + assignments: self.parse_comma_separated(Parser::parse_assignment)?, + } + } + Some(Keyword::DELETE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return Err(ParserError::ParserError(format!( + "DELETE is not allowed in a {clause_kind} merge clause" + ))); + } + MergeAction::Delete + } + Some(Keyword::INSERT) => { + if !matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return Err(ParserError::ParserError(format!( + "INSERT is not allowed in a {clause_kind} merge clause" + ))); + } + let is_mysql = dialect_of!(self is MySqlDialect); + + let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let kind = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::ROW) + { + MergeInsertKind::Row + } else { + self.expect_keyword(Keyword::VALUES)?; + let values = self.parse_values(is_mysql)?; + MergeInsertKind::Values(values) + }; + MergeAction::Insert(MergeInsertExpr { columns, kind }) + } + _ => { + return Err(ParserError::ParserError( + "expected UPDATE, DELETE or INSERT in merge clause".to_string(), + )); + } + }; + clauses.push(MergeClause { + clause_kind, + predicate, + action: merge_clause, + }); + } + Ok(clauses) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ac76f6484..382c5b730 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -36,10 +36,71 @@ use crate::ast::helpers::stmt_create_table::{CreateTableBuilder, CreateTableConf use crate::ast::Statement::CreatePolicy; use crate::ast::*; use crate::dialect::*; -use crate::keywords::{Keyword, ALL_KEYWORDS}; +use crate::keywords::Keyword; use crate::tokenizer::*; mod alter; +mod analyze; +mod assert; +mod assignment; +mod attach; +mod cache; +mod call; +mod close; +mod columns; +mod comment; +mod commit; +mod copy; +mod create; +mod deallocate; +mod declare; +mod delete; +mod dialects; +mod discard; +mod drop; +mod end; +mod execute; +mod explain; +mod expr; +mod fetch; +mod flush; +mod grant; +mod identifier; +mod insert; +mod install; +mod keyword; +mod kill; +mod listen; +mod lists; +mod load; +mod merge; +mod msck; +mod notify; +mod optimize; +mod options; +mod pragma; +mod prepare; +mod release; +mod replace; +mod revoke; +mod rollback; +mod savepoint; +mod select; +mod set; +mod show; +mod start; +mod tokens; +mod truncate; +mod uncache; +mod unlisten; +mod unload; +mod update; +mod r#use; +mod value; +mod window; + +#[cfg(test)] +mod tests; #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { @@ -52,6 +113,7 @@ pub enum ParserError { type ParsedAction = (Keyword, Option>); // Use `Parser::expected` instead, if possible +#[macro_export] macro_rules! parser_err { ($MSG:expr, $loc:expr) => { Err(ParserError::ParserError(format!("{}{}", $MSG, $loc))) @@ -198,7 +260,7 @@ const DEFAULT_REMAINING_DEPTH: usize = 50; /// child type. /// /// See [Parser::parse_data_type] for details -struct MatchedTrailingBracket(bool); +pub(crate) struct MatchedTrailingBracket(bool); impl From for MatchedTrailingBracket { fn from(value: bool) -> Self { @@ -392,6 +454,11 @@ impl<'a> Parser<'a> { Ok(self.with_tokens_with_locations(tokens)) } + /// The index of the first unprocessed token. + pub fn index(&self) -> usize { + self.index + } + /// Parse potentially multiple statements /// /// Example @@ -562,13093 +629,73 @@ impl<'a> Parser<'a> { } } - pub fn parse_comment(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - - self.expect_keyword(Keyword::ON)?; - let token = self.next_token(); - - let (object_type, object_name) = match token.token { - Token::Word(w) if w.keyword == Keyword::COLUMN => { - (CommentObject::Column, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::TABLE => { - (CommentObject::Table, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::EXTENSION => { - (CommentObject::Extension, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::SCHEMA => { - (CommentObject::Schema, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::DATABASE => { - (CommentObject::Database, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::USER => { - (CommentObject::User, self.parse_object_name(false)?) - } - Token::Word(w) if w.keyword == Keyword::ROLE => { - (CommentObject::Role, self.parse_object_name(false)?) - } - _ => self.expected("comment object_type", token)?, - }; - - self.expect_keyword(Keyword::IS)?; - let comment = if self.parse_keyword(Keyword::NULL) { - None - } else { - Some(self.parse_literal_string()?) - }; - Ok(Statement::Comment { - object_type, - object_name, - comment, - if_exists, - }) - } - - pub fn parse_flush(&mut self) -> Result { - let mut channel = None; - let mut tables: Vec = vec![]; - let mut read_lock = false; - let mut export = false; - - if !dialect_of!(self is MySqlDialect | GenericDialect) { - return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start); - } - - let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) { - Some(FlushLocation::NoWriteToBinlog) - } else if self.parse_keyword(Keyword::LOCAL) { - Some(FlushLocation::Local) - } else { - None - }; - - let object_type = if self.parse_keywords(&[Keyword::BINARY, Keyword::LOGS]) { - FlushType::BinaryLogs - } else if self.parse_keywords(&[Keyword::ENGINE, Keyword::LOGS]) { - FlushType::EngineLogs - } else if self.parse_keywords(&[Keyword::ERROR, Keyword::LOGS]) { - FlushType::ErrorLogs - } else if self.parse_keywords(&[Keyword::GENERAL, Keyword::LOGS]) { - FlushType::GeneralLogs - } else if self.parse_keywords(&[Keyword::HOSTS]) { - FlushType::Hosts - } else if self.parse_keyword(Keyword::PRIVILEGES) { - FlushType::Privileges - } else if self.parse_keyword(Keyword::OPTIMIZER_COSTS) { - FlushType::OptimizerCosts - } else if self.parse_keywords(&[Keyword::RELAY, Keyword::LOGS]) { - if self.parse_keywords(&[Keyword::FOR, Keyword::CHANNEL]) { - channel = Some(self.parse_object_name(false).unwrap().to_string()); - } - FlushType::RelayLogs - } else if self.parse_keywords(&[Keyword::SLOW, Keyword::LOGS]) { - FlushType::SlowLogs - } else if self.parse_keyword(Keyword::STATUS) { - FlushType::Status - } else if self.parse_keyword(Keyword::USER_RESOURCES) { - FlushType::UserResources - } else if self.parse_keywords(&[Keyword::LOGS]) { - FlushType::Logs - } else if self.parse_keywords(&[Keyword::TABLES]) { - loop { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::WITH => { - read_lock = self.parse_keywords(&[Keyword::READ, Keyword::LOCK]); - } - Keyword::FOR => { - export = self.parse_keyword(Keyword::EXPORT); - } - Keyword::NoKeyword => { - self.prev_token(); - tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; - } - _ => {} - }, - _ => { - break; - } - } - } - - FlushType::Tables - } else { - return self.expected( - "BINARY LOGS, ENGINE LOGS, ERROR LOGS, GENERAL LOGS, HOSTS, LOGS, PRIVILEGES, OPTIMIZER_COSTS,\ - RELAY LOGS [FOR CHANNEL channel], SLOW LOGS, STATUS, USER_RESOURCES", - self.peek_token(), - ); - }; - - Ok(Statement::Flush { - object_type, - location, - channel, - read_lock, - export, - tables, - }) - } - - pub fn parse_msck(&mut self) -> Result { - let repair = self.parse_keyword(Keyword::REPAIR); - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name(false)?; - let partition_action = self - .maybe_parse(|parser| { - let pa = match parser.parse_one_of_keywords(&[ - Keyword::ADD, - Keyword::DROP, - Keyword::SYNC, - ]) { - Some(Keyword::ADD) => Some(AddDropSync::ADD), - Some(Keyword::DROP) => Some(AddDropSync::DROP), - Some(Keyword::SYNC) => Some(AddDropSync::SYNC), - _ => None, - }; - parser.expect_keyword(Keyword::PARTITIONS)?; - Ok(pa) - })? - .unwrap_or_default(); - Ok(Statement::Msck { - repair, - table_name, - partition_action, - }) - } - - pub fn parse_truncate(&mut self) -> Result { - let table = self.parse_keyword(Keyword::TABLE); - let only = self.parse_keyword(Keyword::ONLY); - - let table_names = self - .parse_comma_separated(|p| p.parse_object_name(false))? - .into_iter() - .map(|n| TruncateTableTarget { name: n }) - .collect(); - - let mut partitions = None; - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - } - - let mut identity = None; - let mut cascade = None; - - if dialect_of!(self is PostgreSqlDialect | GenericDialect) { - identity = if self.parse_keywords(&[Keyword::RESTART, Keyword::IDENTITY]) { - Some(TruncateIdentityOption::Restart) - } else if self.parse_keywords(&[Keyword::CONTINUE, Keyword::IDENTITY]) { - Some(TruncateIdentityOption::Continue) - } else { - None - }; - - cascade = if self.parse_keyword(Keyword::CASCADE) { - Some(TruncateCascadeOption::Cascade) - } else if self.parse_keyword(Keyword::RESTRICT) { - Some(TruncateCascadeOption::Restrict) - } else { - None - }; - }; - - let on_cluster = self.parse_optional_on_cluster()?; - - Ok(Statement::Truncate { - table_names, - partitions, - table, - only, - identity, - cascade, - on_cluster, - }) - } - - pub fn parse_attach_duckdb_database_options( - &mut self, - ) -> Result, ParserError> { - if !self.consume_token(&Token::LParen) { - return Ok(vec![]); - } - - let mut options = vec![]; - loop { - if self.parse_keyword(Keyword::READ_ONLY) { - let boolean = if self.parse_keyword(Keyword::TRUE) { - Some(true) - } else if self.parse_keyword(Keyword::FALSE) { - Some(false) - } else { - None - }; - options.push(AttachDuckDBDatabaseOption::ReadOnly(boolean)); - } else if self.parse_keyword(Keyword::TYPE) { - let ident = self.parse_identifier(false)?; - options.push(AttachDuckDBDatabaseOption::Type(ident)); - } else { - return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); - }; - - if self.consume_token(&Token::RParen) { - return Ok(options); - } else if self.consume_token(&Token::Comma) { - continue; - } else { - return self.expected("expected one of: ')', ','", self.peek_token()); - } - } - } - - pub fn parse_attach_duckdb_database(&mut self) -> Result { - let database = self.parse_keyword(Keyword::DATABASE); - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let database_path = self.parse_identifier(false)?; - let database_alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let attach_options = self.parse_attach_duckdb_database_options()?; - Ok(Statement::AttachDuckDBDatabase { - if_not_exists, - database, - database_path, - database_alias, - attach_options, - }) - } - - pub fn parse_detach_duckdb_database(&mut self) -> Result { - let database = self.parse_keyword(Keyword::DATABASE); - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let database_alias = self.parse_identifier(false)?; - Ok(Statement::DetachDuckDBDatabase { - if_exists, - database, - database_alias, - }) - } - - pub fn parse_attach_database(&mut self) -> Result { - let database = self.parse_keyword(Keyword::DATABASE); - let database_file_name = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let schema_name = self.parse_identifier(false)?; - Ok(Statement::AttachDatabase { - database, - schema_name, - database_file_name, + fn parse(s: String, loc: Location) -> Result + where + ::Err: Display, + { + s.parse::().map_err(|e| { + ParserError::ParserError(format!( + "Could not parse '{s}' as {}: {e}{loc}", + core::any::type_name::() + )) }) } - pub fn parse_analyze(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name(false)?; - let mut for_columns = false; - let mut cache_metadata = false; - let mut noscan = false; - let mut partitions = None; - let mut compute_statistics = false; - let mut columns = vec![]; - loop { - match self.parse_one_of_keywords(&[ - Keyword::PARTITION, - Keyword::FOR, - Keyword::CACHE, - Keyword::NOSCAN, - Keyword::COMPUTE, - ]) { - Some(Keyword::PARTITION) => { - self.expect_token(&Token::LParen)?; - partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - } - Some(Keyword::NOSCAN) => noscan = true, - Some(Keyword::FOR) => { - self.expect_keyword(Keyword::COLUMNS)?; - - columns = self - .maybe_parse(|parser| { - parser.parse_comma_separated(|p| p.parse_identifier(false)) - })? - .unwrap_or_default(); - for_columns = true - } - Some(Keyword::CACHE) => { - self.expect_keyword(Keyword::METADATA)?; - cache_metadata = true - } - Some(Keyword::COMPUTE) => { - self.expect_keyword(Keyword::STATISTICS)?; - compute_statistics = true - } - _ => break, - } + /// Run a parser method `f`, reverting back to the current position if unsuccessful. + /// Returns `None` if `f` returns an error + pub fn maybe_parse(&mut self, f: F) -> Result, ParserError> + where + F: FnMut(&mut Parser) -> Result, + { + match self.try_parse(f) { + Ok(t) => Ok(Some(t)), + Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded), + _ => Ok(None), } - - Ok(Statement::Analyze { - table_name, - for_columns, - columns, - partitions, - cache_metadata, - noscan, - compute_statistics, - }) } - /// Parse a new expression including wildcard & qualified wildcard. - pub fn parse_wildcard_expr(&mut self) -> Result { + /// Run a parser method `f`, reverting back to the current position if unsuccessful. + pub fn try_parse(&mut self, mut f: F) -> Result + where + F: FnMut(&mut Parser) -> Result, + { let index = self.index; - - let next_token = self.next_token(); - match next_token.token { - t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { - if self.peek_token().token == Token::Period { - let mut id_parts: Vec = vec![match t { - Token::Word(w) => w.to_ident(next_token.span), - Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => unreachable!(), // We matched above - }]; - - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::SingleQuotedString(s) => { - // SQLite has single-quoted identifiers - id_parts.push(Ident::with_quote('\'', s)) - } - Token::Mul => { - return Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(next_token), - )); - } - _ => { - return self - .expected("an identifier or a '*' after '.'", next_token); - } - } - } - } - } - Token::Mul => { - return Ok(Expr::Wildcard(AttachedToken(next_token))); - } - _ => (), - }; - - self.index = index; - self.parse_expr() - } - - /// Parse a new expression. - pub fn parse_expr(&mut self) -> Result { - self.parse_subexpr(self.dialect.prec_unknown()) - } - - /// Parse tokens until the precedence changes. - pub fn parse_subexpr(&mut self, precedence: u8) -> Result { - let _guard = self.recursion_counter.try_decrease()?; - debug!("parsing expr"); - let mut expr = self.parse_prefix()?; - debug!("prefix: {:?}", expr); - loop { - let next_precedence = self.get_next_precedence()?; - debug!("next precedence: {:?}", next_precedence); - - if precedence >= next_precedence { - break; - } - - expr = self.parse_infix(expr, next_precedence)?; - } - Ok(expr) - } - - pub fn parse_assert(&mut self) -> Result { - let condition = self.parse_expr()?; - let message = if self.parse_keyword(Keyword::AS) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(Statement::Assert { condition, message }) - } - - pub fn parse_savepoint(&mut self) -> Result { - let name = self.parse_identifier(false)?; - Ok(Statement::Savepoint { name }) - } - - pub fn parse_release(&mut self) -> Result { - let _ = self.parse_keyword(Keyword::SAVEPOINT); - let name = self.parse_identifier(false)?; - - Ok(Statement::ReleaseSavepoint { name }) - } - - pub fn parse_listen(&mut self) -> Result { - let channel = self.parse_identifier(false)?; - Ok(Statement::LISTEN { channel }) - } - - pub fn parse_unlisten(&mut self) -> Result { - let channel = if self.consume_token(&Token::Mul) { - Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) - } else { - match self.parse_identifier(false) { - Ok(expr) => expr, - _ => { - self.prev_token(); - return self.expected("wildcard or identifier", self.peek_token()); - } - } - }; - Ok(Statement::UNLISTEN { channel }) - } - - pub fn parse_notify(&mut self) -> Result { - let channel = self.parse_identifier(false)?; - let payload = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_string()?) - } else { - None - }; - Ok(Statement::NOTIFY { channel, payload }) - } - - // Tries to parse an expression by matching the specified word to known keywords that have a special meaning in the dialect. - // Returns `None if no match is found. - fn parse_expr_prefix_by_reserved_word( - &mut self, - w: &Word, - w_span: Span, - ) -> Result, ParserError> { - match w.keyword { - Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => { - self.prev_token(); - Ok(Some(Expr::Value(self.parse_value()?))) - } - Keyword::NULL => { - self.prev_token(); - Ok(Some(Expr::Value(self.parse_value()?))) - } - Keyword::CURRENT_CATALOG - | Keyword::CURRENT_USER - | Keyword::SESSION_USER - | Keyword::USER - if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - Ok(Some(Expr::Function(Function { - name: ObjectName(vec![w.to_ident(w_span)]), - parameters: FunctionArguments::None, - args: FunctionArguments::None, - null_treatment: None, - filter: None, - over: None, - within_group: vec![], - }))) - } - Keyword::CURRENT_TIMESTAMP - | Keyword::CURRENT_TIME - | Keyword::CURRENT_DATE - | Keyword::LOCALTIME - | Keyword::LOCALTIMESTAMP => { - Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?)) - } - Keyword::CASE => Ok(Some(self.parse_case_expr()?)), - Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)), - Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => Ok(Some(self.parse_convert_expr(true)?)), - Keyword::CAST => Ok(Some(self.parse_cast_expr(CastKind::Cast)?)), - Keyword::TRY_CAST => Ok(Some(self.parse_cast_expr(CastKind::TryCast)?)), - Keyword::SAFE_CAST => Ok(Some(self.parse_cast_expr(CastKind::SafeCast)?)), - Keyword::EXISTS - // Support parsing Databricks has a function named `exists`. - if !dialect_of!(self is DatabricksDialect) - || matches!( - self.peek_nth_token(1).token, - Token::Word(Word { - keyword: Keyword::SELECT | Keyword::WITH, - .. - }) - ) => - { - Ok(Some(self.parse_exists_expr(false)?)) - } - Keyword::EXTRACT => Ok(Some(self.parse_extract_expr()?)), - Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), - Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), - Keyword::POSITION if self.peek_token().token == Token::LParen => { - Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) - } - Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)), - Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)), - Keyword::TRIM => Ok(Some(self.parse_trim_expr()?)), - Keyword::INTERVAL => Ok(Some(self.parse_interval()?)), - // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call - Keyword::ARRAY if self.peek_token() == Token::LBracket => { - self.expect_token(&Token::LBracket)?; - Ok(Some(self.parse_array_expr(true)?)) - } - Keyword::ARRAY - if self.peek_token() == Token::LParen - && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => - { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - Ok(Some(Expr::Function(Function { - name: ObjectName(vec![w.to_ident(w_span)]), - parameters: FunctionArguments::None, - args: FunctionArguments::Subquery(query), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - }))) - } - Keyword::NOT => Ok(Some(self.parse_not()?)), - Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { - Ok(Some(self.parse_match_against()?)) - } - Keyword::STRUCT if self.dialect.supports_struct_literal() => { - Ok(Some(self.parse_struct_literal()?)) - } - Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => { - let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?; - Ok(Some(Expr::Prior(Box::new(expr)))) - } - Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => { - Ok(Some(self.parse_duckdb_map_literal()?)) - } - _ => Ok(None) - } - } - - // Tries to parse an expression by a word that is not known to have a special meaning in the dialect. - fn parse_expr_prefix_by_unreserved_word( - &mut self, - w: &Word, - w_span: Span, - ) -> Result { - match self.peek_token().token { - Token::LParen | Token::Period => { - let mut id_parts: Vec = vec![w.to_ident(w_span)]; - let mut ending_wildcard: Option = None; - while self.consume_token(&Token::Period) { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => id_parts.push(w.to_ident(next_token.span)), - Token::Mul => { - // Postgres explicitly allows funcnm(tablenm.*) and the - // function array_agg traverses this control flow - if dialect_of!(self is PostgreSqlDialect) { - ending_wildcard = Some(next_token); - break; - } else { - return self.expected("an identifier after '.'", next_token); - } - } - Token::SingleQuotedString(s) => id_parts.push(Ident::with_quote('\'', s)), - _ => { - return self.expected("an identifier or a '*' after '.'", next_token); - } - } - } - - if let Some(wildcard_token) = ending_wildcard { - Ok(Expr::QualifiedWildcard( - ObjectName(id_parts), - AttachedToken(wildcard_token), - )) - } else if self.consume_token(&Token::LParen) { - if dialect_of!(self is SnowflakeDialect | MsSqlDialect) - && self.consume_tokens(&[Token::Plus, Token::RParen]) - { - Ok(Expr::OuterJoin(Box::new( - match <[Ident; 1]>::try_from(id_parts) { - Ok([ident]) => Expr::Identifier(ident), - Err(parts) => Expr::CompoundIdentifier(parts), - }, - ))) - } else { - self.prev_token(); - self.parse_function(ObjectName(id_parts)) - } - } else { - Ok(Expr::CompoundIdentifier(id_parts)) - } - } - // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html - Token::SingleQuotedString(_) - | Token::DoubleQuotedString(_) - | Token::HexStringLiteral(_) - if w.value.starts_with('_') => - { - Ok(Expr::IntroducedString { - introducer: w.value.clone(), - value: self.parse_introduced_string_value()?, - }) - } - Token::Arrow if self.dialect.supports_lambda_functions() => { - self.expect_token(&Token::Arrow)?; - Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(w.to_ident(w_span)), - body: Box::new(self.parse_expr()?), - })) + match f(self) { + Ok(t) => Ok(t), + Err(e) => { + // Unwind stack if limit exceeded + self.index = index; + Err(e) } - _ => Ok(Expr::Identifier(w.to_ident(w_span))), } } - /// Parse an expression prefix. - pub fn parse_prefix(&mut self) -> Result { - // allow the dialect to override prefix parsing - if let Some(prefix) = self.dialect.parse_prefix(self) { - return prefix; - } - - // PostgreSQL allows any string literal to be preceded by a type name, indicating that the - // string literal represents a literal of that type. Some examples: - // - // DATE '2020-05-20' - // TIMESTAMP WITH TIME ZONE '2020-05-20 7:43:54' - // BOOL 'true' - // - // The first two are standard SQL, while the latter is a PostgreSQL extension. Complicating - // matters is the fact that INTERVAL string literals may optionally be followed by special - // keywords, e.g.: - // - // INTERVAL '7' DAY - // - // Note also that naively `SELECT date` looks like a syntax error because the `date` type - // name is not followed by a string literal, but in fact in PostgreSQL it is a valid - // expression that should parse as the column name "date". - let loc = self.peek_token().span.start; - let opt_expr = self.maybe_parse(|parser| { - match parser.parse_data_type()? { - DataType::Interval => parser.parse_interval(), - // PostgreSQL allows almost any identifier to be used as custom data type name, - // and we support that in `parse_data_type()`. But unlike Postgres we don't - // have a list of globally reserved keywords (since they vary across dialects), - // so given `NOT 'a' LIKE 'b'`, we'd accept `NOT` as a possible custom data type - // name, resulting in `NOT 'a'` being recognized as a `TypedString` instead of - // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the - // `type 'string'` syntax for the custom data types at all. - DataType::Custom(..) => parser_err!("dummy", loc), - data_type => Ok(Expr::TypedString { - data_type, - value: parser.parse_literal_string()?, - }), - } - })?; - - if let Some(expr) = opt_expr { - return Ok(expr); - } - - let next_token = self.next_token(); - let expr = match next_token.token { - Token::Word(w) => { - // The word we consumed may fall into one of two cases: it has a special meaning, or not. - // For example, in Snowflake, the word `interval` may have two meanings depending on the context: - // `SELECT CURRENT_DATE() + INTERVAL '1 DAY', MAX(interval) FROM tbl;` - // ^^^^^^^^^^^^^^^^ ^^^^^^^^ - // interval expression identifier - // - // We first try to parse the word and following tokens as a special expression, and if that fails, - // we rollback and try to parse it as an identifier. - match self.try_parse(|parser| { - parser.parse_expr_prefix_by_reserved_word(&w, next_token.span) - }) { - // This word indicated an expression prefix and parsing was successful - Ok(Some(expr)) => Ok(expr), - - // No expression prefix associated with this word - Ok(None) => Ok(self.parse_expr_prefix_by_unreserved_word(&w, next_token.span)?), - - // If parsing of the word as a special expression failed, we are facing two options: - // 1. The statement is malformed, e.g. `SELECT INTERVAL '1 DAI` (`DAI` instead of `DAY`) - // 2. The word is used as an identifier, e.g. `SELECT MAX(interval) FROM tbl` - // We first try to parse the word as an identifier and if that fails - // we rollback and return the parsing error we got from trying to parse a - // special expression (to maintain backwards compatibility of parsing errors). - Err(e) => { - if !self.dialect.is_reserved_for_identifier(w.keyword) { - if let Ok(Some(expr)) = self.maybe_parse(|parser| { - parser.parse_expr_prefix_by_unreserved_word(&w, next_token.span) - }) { - return Ok(expr); - } - } - return Err(e); - } - } - } // End of Token::Word - // array `[1, 2, 3]` - Token::LBracket => self.parse_array_expr(false), - tok @ Token::Minus | tok @ Token::Plus => { - let op = if tok == Token::Plus { - UnaryOperator::Plus - } else { - UnaryOperator::Minus - }; - Ok(Expr::UnaryOp { - op, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::MulDivModOp))?, - ), - }) - } - Token::ExclamationMark if self.dialect.supports_bang_not_operator() => { - Ok(Expr::UnaryOp { - op: UnaryOperator::BangNot, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, - ), - }) - } - tok @ Token::DoubleExclamationMark - | tok @ Token::PGSquareRoot - | tok @ Token::PGCubeRoot - | tok @ Token::AtSign - | tok @ Token::Tilde - if dialect_of!(self is PostgreSqlDialect) => - { - let op = match tok { - Token::DoubleExclamationMark => UnaryOperator::PGPrefixFactorial, - Token::PGSquareRoot => UnaryOperator::PGSquareRoot, - Token::PGCubeRoot => UnaryOperator::PGCubeRoot, - Token::AtSign => UnaryOperator::PGAbs, - Token::Tilde => UnaryOperator::PGBitwiseNot, - _ => unreachable!(), - }; - Ok(Expr::UnaryOp { - op, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?, - ), - }) - } - Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::UnicodeStringLiteral(_) => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::Number(_, _) - | Token::SingleQuotedString(_) - | Token::DoubleQuotedString(_) - | Token::TripleSingleQuotedString(_) - | Token::TripleDoubleQuotedString(_) - | Token::DollarQuotedString(_) - | Token::SingleQuotedByteStringLiteral(_) - | Token::DoubleQuotedByteStringLiteral(_) - | Token::TripleSingleQuotedByteStringLiteral(_) - | Token::TripleDoubleQuotedByteStringLiteral(_) - | Token::SingleQuotedRawStringLiteral(_) - | Token::DoubleQuotedRawStringLiteral(_) - | Token::TripleSingleQuotedRawStringLiteral(_) - | Token::TripleDoubleQuotedRawStringLiteral(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::LParen => { - let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Some(lambda) = self.try_parse_lambda()? { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; - self.expect_token(&Token::RParen)?; - let expr = self.try_parse_method(expr)?; - if !self.consume_token(&Token::Period) { - Ok(expr) - } else { - let tok = self.next_token(); - let key = match tok.token { - Token::Word(word) => word.to_ident(tok.span), - _ => { - return parser_err!( - format!("Expected identifier, found: {tok}"), - tok.span.start - ) - } - }; - Ok(Expr::CompositeAccess { - expr: Box::new(expr), - key, - }) - } - } - Token::Placeholder(_) | Token::Colon | Token::AtSign => { - self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) - } - Token::LBrace if self.dialect.supports_dictionary_syntax() => { - self.prev_token(); - self.parse_duckdb_struct_literal() - } - _ => self.expected("an expression", next_token), - }?; - - let expr = self.try_parse_method(expr)?; - - if self.parse_keyword(Keyword::COLLATE) { - Ok(Expr::Collate { - expr: Box::new(expr), - collation: self.parse_object_name(false)?, - }) + fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { + Ok(Some(self.parse_identifier(false)?)) } else { - Ok(expr) + Ok(None) } } - pub fn parse_utility_options(&mut self) -> Result, ParserError> { + // Apparently this is dead code? I assume its retained for public API + // compatibility or something of that nature. + pub fn parse_precision(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Self::parse_utility_option)?; + let n = self.parse_literal_uint()?; self.expect_token(&Token::RParen)?; - - Ok(options) - } - - fn parse_utility_option(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let next_token = self.peek_token(); - if next_token == Token::Comma || next_token == Token::RParen { - return Ok(UtilityOption { name, arg: None }); - } - let arg = self.parse_expr()?; - - Ok(UtilityOption { - name, - arg: Some(arg), - }) - } - - fn try_parse_expr_sub_query(&mut self) -> Result, ParserError> { - if !self.peek_sub_query() { - return Ok(None); - } - - Ok(Some(Expr::Subquery(self.parse_query()?))) - } - - fn try_parse_lambda(&mut self) -> Result, ParserError> { - if !self.dialect.supports_lambda_functions() { - return Ok(None); - } - self.maybe_parse(|p| { - let params = p.parse_comma_separated(|p| p.parse_identifier(false))?; - p.expect_token(&Token::RParen)?; - p.expect_token(&Token::Arrow)?; - let expr = p.parse_expr()?; - Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::Many(params), - body: Box::new(expr), - })) - }) - } - - /// Parses method call expression - fn try_parse_method(&mut self, expr: Expr) -> Result { - if !self.dialect.supports_methods() { - return Ok(expr); - } - let method_chain = self.maybe_parse(|p| { - let mut method_chain = Vec::new(); - while p.consume_token(&Token::Period) { - let tok = p.next_token(); - let name = match tok.token { - Token::Word(word) => word.to_ident(tok.span), - _ => return p.expected("identifier", tok), - }; - let func = match p.parse_function(ObjectName(vec![name]))? { - Expr::Function(func) => func, - _ => return p.expected("function", p.peek_token()), - }; - method_chain.push(func); - } - if !method_chain.is_empty() { - Ok(method_chain) - } else { - p.expected("function", p.peek_token()) - } - })?; - if let Some(method_chain) = method_chain { - Ok(Expr::Method(Method { - expr: Box::new(expr), - method_chain, - })) - } else { - Ok(expr) - } + Ok(n) } - pub fn parse_function(&mut self, name: ObjectName) -> Result { - self.expect_token(&Token::LParen)?; - - // Snowflake permits a subquery to be passed as an argument without - // an enclosing set of parens if it's the only argument. - if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - return Ok(Expr::Function(Function { - name, - parameters: FunctionArguments::None, - args: FunctionArguments::Subquery(subquery), - filter: None, - null_treatment: None, - over: None, - within_group: vec![], - })); - } - - let mut args = self.parse_function_argument_list()?; - let mut parameters = FunctionArguments::None; - // ClickHouse aggregations support parametric functions like `HISTOGRAM(0.5, 0.6)(x, y)` - // which (0.5, 0.6) is a parameter to the function. - if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.consume_token(&Token::LParen) + /// Returns true if the next keyword indicates a sub query, i.e. SELECT or WITH + fn peek_sub_query(&mut self) -> bool { + if self + .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + .is_some() { - parameters = FunctionArguments::List(args); - args = self.parse_function_argument_list()?; - } - - let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) { - self.expect_token(&Token::LParen)?; - self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?; - let order_by = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; - order_by - } else { - vec![] - }; - - let filter = if self.dialect.supports_filter_during_aggregation() - && self.parse_keyword(Keyword::FILTER) - && self.consume_token(&Token::LParen) - && self.parse_keyword(Keyword::WHERE) - { - let filter = Some(Box::new(self.parse_expr()?)); - self.expect_token(&Token::RParen)?; - filter - } else { - None - }; - - // Syntax for null treatment shows up either in the args list - // or after the function call, but not both. - let null_treatment = if args - .clauses - .iter() - .all(|clause| !matches!(clause, FunctionArgumentClause::IgnoreOrRespectNulls(_))) - { - self.parse_null_treatment()? - } else { - None - }; - - let over = if self.parse_keyword(Keyword::OVER) { - if self.consume_token(&Token::LParen) { - let window_spec = self.parse_window_spec()?; - Some(WindowType::WindowSpec(window_spec)) - } else { - Some(WindowType::NamedWindow(self.parse_identifier(false)?)) - } - } else { - None - }; - - Ok(Expr::Function(Function { - name, - parameters, - args: FunctionArguments::List(args), - null_treatment, - filter, - over, - within_group, - })) - } - - /// Optionally parses a null treatment clause. - fn parse_null_treatment(&mut self) -> Result, ParserError> { - match self.parse_one_of_keywords(&[Keyword::RESPECT, Keyword::IGNORE]) { - Some(keyword) => { - self.expect_keyword(Keyword::NULLS)?; - - Ok(match keyword { - Keyword::RESPECT => Some(NullTreatment::RespectNulls), - Keyword::IGNORE => Some(NullTreatment::IgnoreNulls), - _ => None, - }) - } - None => Ok(None), - } - } - - pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { - let args = if self.consume_token(&Token::LParen) { - FunctionArguments::List(self.parse_function_argument_list()?) - } else { - FunctionArguments::None - }; - Ok(Expr::Function(Function { - name, - parameters: FunctionArguments::None, - args, - filter: None, - over: None, - null_treatment: None, - within_group: vec![], - })) - } - - pub fn parse_window_frame_units(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::ROWS => Ok(WindowFrameUnits::Rows), - Keyword::RANGE => Ok(WindowFrameUnits::Range), - Keyword::GROUPS => Ok(WindowFrameUnits::Groups), - _ => self.expected("ROWS, RANGE, GROUPS", next_token)?, - }, - _ => self.expected("ROWS, RANGE, GROUPS", next_token), - } - } - - pub fn parse_window_frame(&mut self) -> Result { - let units = self.parse_window_frame_units()?; - let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { - let start_bound = self.parse_window_frame_bound()?; - self.expect_keyword(Keyword::AND)?; - let end_bound = Some(self.parse_window_frame_bound()?); - (start_bound, end_bound) - } else { - (self.parse_window_frame_bound()?, None) - }; - Ok(WindowFrame { - units, - start_bound, - end_bound, - }) - } - - /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` - pub fn parse_window_frame_bound(&mut self) -> Result { - if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { - Ok(WindowFrameBound::CurrentRow) - } else { - let rows = if self.parse_keyword(Keyword::UNBOUNDED) { - None - } else { - Some(Box::new(match self.peek_token().token { - Token::SingleQuotedString(_) => self.parse_interval()?, - _ => self.parse_expr()?, - })) - }; - if self.parse_keyword(Keyword::PRECEDING) { - Ok(WindowFrameBound::Preceding(rows)) - } else if self.parse_keyword(Keyword::FOLLOWING) { - Ok(WindowFrameBound::Following(rows)) - } else { - self.expected("PRECEDING or FOLLOWING", self.peek_token()) - } - } - } - - /// Parse a group by expr. Group by expr can be one of group sets, roll up, cube, or simple expr. - fn parse_group_by_expr(&mut self) -> Result { - if self.dialect.supports_group_by_expr() { - if self.parse_keywords(&[Keyword::GROUPING, Keyword::SETS]) { - self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(false, true))?; - self.expect_token(&Token::RParen)?; - Ok(Expr::GroupingSets(result)) - } else if self.parse_keyword(Keyword::CUBE) { - self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Cube(result)) - } else if self.parse_keyword(Keyword::ROLLUP) { - self.expect_token(&Token::LParen)?; - let result = self.parse_comma_separated(|p| p.parse_tuple(true, true))?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Rollup(result)) - } else if self.consume_tokens(&[Token::LParen, Token::RParen]) { - // PostgreSQL allow to use empty tuple as a group by expression, - // e.g. `GROUP BY (), name`. Please refer to GROUP BY Clause section in - // [PostgreSQL](https://www.postgresql.org/docs/16/sql-select.html) - Ok(Expr::Tuple(vec![])) - } else { - self.parse_expr() - } - } else { - // TODO parse rollup for other dialects - self.parse_expr() - } - } - - /// Parse a tuple with `(` and `)`. - /// If `lift_singleton` is true, then a singleton tuple is lifted to a tuple of length 1, otherwise it will fail. - /// If `allow_empty` is true, then an empty tuple is allowed. - fn parse_tuple( - &mut self, - lift_singleton: bool, - allow_empty: bool, - ) -> Result, ParserError> { - if lift_singleton { - if self.consume_token(&Token::LParen) { - let result = if allow_empty && self.consume_token(&Token::RParen) { - vec![] - } else { - let result = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - result - }; - Ok(result) - } else { - Ok(vec![self.parse_expr()?]) - } - } else { - self.expect_token(&Token::LParen)?; - let result = if allow_empty && self.consume_token(&Token::RParen) { - vec![] - } else { - let result = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - result - }; - Ok(result) - } - } - - pub fn parse_case_expr(&mut self) -> Result { - let mut operand = None; - if !self.parse_keyword(Keyword::WHEN) { - operand = Some(Box::new(self.parse_expr()?)); - self.expect_keyword(Keyword::WHEN)?; - } - let mut conditions = vec![]; - let mut results = vec![]; - loop { - conditions.push(self.parse_expr()?); - self.expect_keyword(Keyword::THEN)?; - results.push(self.parse_expr()?); - if !self.parse_keyword(Keyword::WHEN) { - break; - } - } - let else_result = if self.parse_keyword(Keyword::ELSE) { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - self.expect_keyword(Keyword::END)?; - Ok(Expr::Case { - operand, - conditions, - results, - else_result, - }) - } - - pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::FORMAT) { - let value = self.parse_value()?; - match self.parse_optional_time_zone()? { - Some(tz) => Ok(Some(CastFormat::ValueAtTimeZone(value, tz))), - None => Ok(Some(CastFormat::Value(value))), - } - } else { - Ok(None) - } - } - - pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { - self.parse_value().map(Some) - } else { - Ok(None) - } - } - - /// mssql-like convert function - fn parse_mssql_convert(&mut self, is_try: bool) -> Result { - self.expect_token(&Token::LParen)?; - let data_type = self.parse_data_type()?; - self.expect_token(&Token::Comma)?; - let expr = self.parse_expr()?; - let styles = if self.consume_token(&Token::Comma) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - Default::default() - }; - self.expect_token(&Token::RParen)?; - Ok(Expr::Convert { - is_try, - expr: Box::new(expr), - data_type: Some(data_type), - charset: None, - target_before_value: true, - styles, - }) - } - - /// Parse a SQL CONVERT function: - /// - `CONVERT('héhé' USING utf8mb4)` (MySQL) - /// - `CONVERT('héhé', CHAR CHARACTER SET utf8mb4)` (MySQL) - /// - `CONVERT(DECIMAL(10, 5), 42)` (MSSQL) - the type comes first - pub fn parse_convert_expr(&mut self, is_try: bool) -> Result { - if self.dialect.convert_type_before_value() { - return self.parse_mssql_convert(is_try); - } - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - if self.parse_keyword(Keyword::USING) { - let charset = self.parse_object_name(false)?; - self.expect_token(&Token::RParen)?; - return Ok(Expr::Convert { - is_try, - expr: Box::new(expr), - data_type: None, - charset: Some(charset), - target_before_value: false, - styles: vec![], - }); - } - self.expect_token(&Token::Comma)?; - let data_type = self.parse_data_type()?; - let charset = if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { - Some(self.parse_object_name(false)?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - Ok(Expr::Convert { - is_try, - expr: Box::new(expr), - data_type: Some(data_type), - charset, - target_before_value: false, - styles: vec![], - }) - } - - /// Parse a SQL CAST function e.g. `CAST(expr AS FLOAT)` - pub fn parse_cast_expr(&mut self, kind: CastKind) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::AS)?; - let data_type = self.parse_data_type()?; - let format = self.parse_optional_cast_format()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Cast { - kind, - expr: Box::new(expr), - data_type, - format, - }) - } - - /// Parse a SQL EXISTS expression e.g. `WHERE EXISTS(SELECT ...)`. - pub fn parse_exists_expr(&mut self, negated: bool) -> Result { - self.expect_token(&Token::LParen)?; - let exists_node = Expr::Exists { - negated, - subquery: self.parse_query()?, - }; - self.expect_token(&Token::RParen)?; - Ok(exists_node) - } - - pub fn parse_extract_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let field = self.parse_date_time_field()?; - - let syntax = if self.parse_keyword(Keyword::FROM) { - ExtractSyntax::From - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | GenericDialect) - { - ExtractSyntax::Comma - } else { - return Err(ParserError::ParserError( - "Expected 'FROM' or ','".to_string(), - )); - }; - - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Extract { - field, - expr: Box::new(expr), - syntax, - }) - } - - pub fn parse_ceil_floor_expr(&mut self, is_ceil: bool) -> Result { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - // Parse `CEIL/FLOOR(expr)` - let field = if self.parse_keyword(Keyword::TO) { - // Parse `CEIL/FLOOR(expr TO DateTimeField)` - CeilFloorKind::DateTimeField(self.parse_date_time_field()?) - } else if self.consume_token(&Token::Comma) { - // Parse `CEIL/FLOOR(expr, scale)` - match self.parse_value()? { - Value::Number(n, s) => CeilFloorKind::Scale(Value::Number(n, s)), - _ => { - return Err(ParserError::ParserError( - "Scale field can only be of number type".to_string(), - )) - } - } - } else { - CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) - }; - self.expect_token(&Token::RParen)?; - if is_ceil { - Ok(Expr::Ceil { - expr: Box::new(expr), - field, - }) - } else { - Ok(Expr::Floor { - expr: Box::new(expr), - field, - }) - } - } - - pub fn parse_position_expr(&mut self, ident: Ident) -> Result { - let between_prec = self.dialect.prec_value(Precedence::Between); - let position_expr = self.maybe_parse(|p| { - // PARSE SELECT POSITION('@' in field) - p.expect_token(&Token::LParen)?; - - // Parse the subexpr till the IN keyword - let expr = p.parse_subexpr(between_prec)?; - p.expect_keyword(Keyword::IN)?; - let from = p.parse_expr()?; - p.expect_token(&Token::RParen)?; - Ok(Expr::Position { - expr: Box::new(expr), - r#in: Box::new(from), - }) - })?; - match position_expr { - Some(expr) => Ok(expr), - // Snowflake supports `position` as an ordinary function call - // without the special `IN` syntax. - None => self.parse_function(ObjectName(vec![ident])), - } - } - - pub fn parse_substring_expr(&mut self) -> Result { - // PARSE SUBSTRING (EXPR [FROM 1] [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - let mut from_expr = None; - let special = self.consume_token(&Token::Comma); - if special || self.parse_keyword(Keyword::FROM) { - from_expr = Some(self.parse_expr()?); - } - - let mut to_expr = None; - if self.parse_keyword(Keyword::FOR) || self.consume_token(&Token::Comma) { - to_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Substring { - expr: Box::new(expr), - substring_from: from_expr.map(Box::new), - substring_for: to_expr.map(Box::new), - special, - }) - } - - pub fn parse_overlay_expr(&mut self) -> Result { - // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_keyword(Keyword::PLACING)?; - let what_expr = self.parse_expr()?; - self.expect_keyword(Keyword::FROM)?; - let from_expr = self.parse_expr()?; - let mut for_expr = None; - if self.parse_keyword(Keyword::FOR) { - for_expr = Some(self.parse_expr()?); - } - self.expect_token(&Token::RParen)?; - - Ok(Expr::Overlay { - expr: Box::new(expr), - overlay_what: Box::new(what_expr), - overlay_from: Box::new(from_expr), - overlay_for: for_expr.map(Box::new), - }) - } - - /// ```sql - /// TRIM ([WHERE] ['text' FROM] 'text') - /// TRIM ('text') - /// TRIM(, [, characters]) -- only Snowflake or BigQuery - /// ``` - pub fn parse_trim_expr(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let mut trim_where = None; - if let Token::Word(word) = self.peek_token().token { - if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] - .iter() - .any(|d| word.keyword == *d) - { - trim_where = Some(self.parse_trim_where()?); - } - } - let expr = self.parse_expr()?; - if self.parse_keyword(Keyword::FROM) { - let trim_what = Box::new(expr); - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where, - trim_what: Some(trim_what), - trim_characters: None, - }) - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | BigQueryDialect | GenericDialect) - { - let characters = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where: None, - trim_what: None, - trim_characters: Some(characters), - }) - } else { - self.expect_token(&Token::RParen)?; - Ok(Expr::Trim { - expr: Box::new(expr), - trim_where, - trim_what: None, - trim_characters: None, - }) - } - } - - pub fn parse_trim_where(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::BOTH => Ok(TrimWhereField::Both), - Keyword::LEADING => Ok(TrimWhereField::Leading), - Keyword::TRAILING => Ok(TrimWhereField::Trailing), - _ => self.expected("trim_where field", next_token)?, - }, - _ => self.expected("trim_where field", next_token), - } - } - - /// Parses an array expression `[ex1, ex2, ..]` - /// if `named` is `true`, came from an expression like `ARRAY[ex1, ex2]` - pub fn parse_array_expr(&mut self, named: bool) -> Result { - let exprs = self.parse_comma_separated0(Parser::parse_expr, Token::RBracket)?; - self.expect_token(&Token::RBracket)?; - Ok(Expr::Array(Array { elem: exprs, named })) - } - - pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { - if self.parse_keyword(Keyword::ERROR) { - Ok(Some(ListAggOnOverflow::Error)) - } else { - self.expect_keyword(Keyword::TRUNCATE)?; - let filler = match self.peek_token().token { - Token::Word(w) - if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => - { - None - } - Token::SingleQuotedString(_) - | Token::EscapedStringLiteral(_) - | Token::UnicodeStringLiteral(_) - | Token::NationalStringLiteral(_) - | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), - _ => self.expected( - "either filler, WITH, or WITHOUT in LISTAGG", - self.peek_token(), - )?, - }; - let with_count = self.parse_keyword(Keyword::WITH); - if !with_count && !self.parse_keyword(Keyword::WITHOUT) { - self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; - } - self.expect_keyword(Keyword::COUNT)?; - Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) - } - } else { - Ok(None) - } - } - - // This function parses date/time fields for the EXTRACT function-like - // operator, interval qualifiers, and the ceil/floor operations. - // EXTRACT supports a wider set of date/time fields than interval qualifiers, - // so this function may need to be split in two. - pub fn parse_date_time_field(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::YEAR => Ok(DateTimeField::Year), - Keyword::MONTH => Ok(DateTimeField::Month), - Keyword::WEEK => { - let week_day = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.consume_token(&Token::LParen) - { - let week_day = self.parse_identifier(false)?; - self.expect_token(&Token::RParen)?; - Some(week_day) - } else { - None - }; - Ok(DateTimeField::Week(week_day)) - } - Keyword::DAY => Ok(DateTimeField::Day), - Keyword::DAYOFWEEK => Ok(DateTimeField::DayOfWeek), - Keyword::DAYOFYEAR => Ok(DateTimeField::DayOfYear), - Keyword::DATE => Ok(DateTimeField::Date), - Keyword::DATETIME => Ok(DateTimeField::Datetime), - Keyword::HOUR => Ok(DateTimeField::Hour), - Keyword::MINUTE => Ok(DateTimeField::Minute), - Keyword::SECOND => Ok(DateTimeField::Second), - Keyword::CENTURY => Ok(DateTimeField::Century), - Keyword::DECADE => Ok(DateTimeField::Decade), - Keyword::DOY => Ok(DateTimeField::Doy), - Keyword::DOW => Ok(DateTimeField::Dow), - Keyword::EPOCH => Ok(DateTimeField::Epoch), - Keyword::ISODOW => Ok(DateTimeField::Isodow), - Keyword::ISOYEAR => Ok(DateTimeField::Isoyear), - Keyword::ISOWEEK => Ok(DateTimeField::IsoWeek), - Keyword::JULIAN => Ok(DateTimeField::Julian), - Keyword::MICROSECOND => Ok(DateTimeField::Microsecond), - Keyword::MICROSECONDS => Ok(DateTimeField::Microseconds), - Keyword::MILLENIUM => Ok(DateTimeField::Millenium), - Keyword::MILLENNIUM => Ok(DateTimeField::Millennium), - Keyword::MILLISECOND => Ok(DateTimeField::Millisecond), - Keyword::MILLISECONDS => Ok(DateTimeField::Milliseconds), - Keyword::NANOSECOND => Ok(DateTimeField::Nanosecond), - Keyword::NANOSECONDS => Ok(DateTimeField::Nanoseconds), - Keyword::QUARTER => Ok(DateTimeField::Quarter), - Keyword::TIME => Ok(DateTimeField::Time), - Keyword::TIMEZONE => Ok(DateTimeField::Timezone), - Keyword::TIMEZONE_ABBR => Ok(DateTimeField::TimezoneAbbr), - Keyword::TIMEZONE_HOUR => Ok(DateTimeField::TimezoneHour), - Keyword::TIMEZONE_MINUTE => Ok(DateTimeField::TimezoneMinute), - Keyword::TIMEZONE_REGION => Ok(DateTimeField::TimezoneRegion), - _ if self.dialect.allow_extract_custom() => { - self.prev_token(); - let custom = self.parse_identifier(false)?; - Ok(DateTimeField::Custom(custom)) - } - _ => self.expected("date/time field", next_token), - }, - Token::SingleQuotedString(_) if self.dialect.allow_extract_single_quotes() => { - self.prev_token(); - let custom = self.parse_identifier(false)?; - Ok(DateTimeField::Custom(custom)) - } - _ => self.expected("date/time field", next_token), - } - } - - pub fn parse_not(&mut self) -> Result { - match self.peek_token().token { - Token::Word(w) => match w.keyword { - Keyword::EXISTS => { - let negated = true; - let _ = self.parse_keyword(Keyword::EXISTS); - self.parse_exists_expr(negated) - } - _ => Ok(Expr::UnaryOp { - op: UnaryOperator::Not, - expr: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?, - ), - }), - }, - _ => Ok(Expr::UnaryOp { - op: UnaryOperator::Not, - expr: Box::new(self.parse_subexpr(self.dialect.prec_value(Precedence::UnaryNot))?), - }), - } - } - - /// Parses fulltext expressions [`sqlparser::ast::Expr::MatchAgainst`] - /// - /// # Errors - /// This method will raise an error if the column list is empty or with invalid identifiers, - /// the match expression is not a literal string, or if the search modifier is not valid. - pub fn parse_match_against(&mut self) -> Result { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - self.expect_keyword(Keyword::AGAINST)?; - - self.expect_token(&Token::LParen)?; - - // MySQL is too permissive about the value, IMO we can't validate it perfectly on syntax level. - let match_value = self.parse_value()?; - - let in_natural_language_mode_keywords = &[ - Keyword::IN, - Keyword::NATURAL, - Keyword::LANGUAGE, - Keyword::MODE, - ]; - - let with_query_expansion_keywords = &[Keyword::WITH, Keyword::QUERY, Keyword::EXPANSION]; - - let in_boolean_mode_keywords = &[Keyword::IN, Keyword::BOOLEAN, Keyword::MODE]; - - let opt_search_modifier = if self.parse_keywords(in_natural_language_mode_keywords) { - if self.parse_keywords(with_query_expansion_keywords) { - Some(SearchModifier::InNaturalLanguageModeWithQueryExpansion) - } else { - Some(SearchModifier::InNaturalLanguageMode) - } - } else if self.parse_keywords(in_boolean_mode_keywords) { - Some(SearchModifier::InBooleanMode) - } else if self.parse_keywords(with_query_expansion_keywords) { - Some(SearchModifier::WithQueryExpansion) - } else { - None - }; - - self.expect_token(&Token::RParen)?; - - Ok(Expr::MatchAgainst { - columns, - match_value, - opt_search_modifier, - }) - } - - /// Parse an `INTERVAL` expression. - /// - /// Some syntactically valid intervals: - /// - /// ```sql - /// 1. INTERVAL '1' DAY - /// 2. INTERVAL '1-1' YEAR TO MONTH - /// 3. INTERVAL '1' SECOND - /// 4. INTERVAL '1:1:1.1' HOUR (5) TO SECOND (5) - /// 5. INTERVAL '1.1' SECOND (2, 2) - /// 6. INTERVAL '1:1' HOUR (5) TO MINUTE (5) - /// 7. (MySql & BigQuery only): INTERVAL 1 DAY - /// ``` - /// - /// Note that we do not currently attempt to parse the quoted value. - pub fn parse_interval(&mut self) -> Result { - // The SQL standard allows an optional sign before the value string, but - // it is not clear if any implementations support that syntax, so we - // don't currently try to parse it. (The sign can instead be included - // inside the value string.) - - // to match the different flavours of INTERVAL syntax, we only allow expressions - // if the dialect requires an interval qualifier, - // see https://github.com/sqlparser-rs/sqlparser-rs/pull/1398 for more details - let value = if self.dialect.require_interval_qualifier() { - // parse a whole expression so `INTERVAL 1 + 1 DAY` is valid - self.parse_expr()? - } else { - // parse a prefix expression so `INTERVAL 1 DAY` is valid, but `INTERVAL 1 + 1 DAY` is not - // this also means that `INTERVAL '5 days' > INTERVAL '1 day'` treated properly - self.parse_prefix()? - }; - - // Following the string literal is a qualifier which indicates the units - // of the duration specified in the string literal. - // - // Note that PostgreSQL allows omitting the qualifier, so we provide - // this more general implementation. - let leading_field = if self.next_token_is_temporal_unit() { - Some(self.parse_date_time_field()?) - } else if self.dialect.require_interval_qualifier() { - return parser_err!( - "INTERVAL requires a unit after the literal value", - self.peek_token().span.start - ); - } else { - None - }; - - let (leading_precision, last_field, fsec_precision) = - if leading_field == Some(DateTimeField::Second) { - // SQL mandates special syntax for `SECOND TO SECOND` literals. - // Instead of - // `SECOND [()] TO SECOND[()]` - // one must use the special format: - // `SECOND [( [ , ] )]` - let last_field = None; - let (leading_precision, fsec_precision) = self.parse_optional_precision_scale()?; - (leading_precision, last_field, fsec_precision) - } else { - let leading_precision = self.parse_optional_precision()?; - if self.parse_keyword(Keyword::TO) { - let last_field = Some(self.parse_date_time_field()?); - let fsec_precision = if last_field == Some(DateTimeField::Second) { - self.parse_optional_precision()? - } else { - None - }; - (leading_precision, last_field, fsec_precision) - } else { - (leading_precision, None, None) - } - }; - - Ok(Expr::Interval(Interval { - value: Box::new(value), - leading_field, - leading_precision, - last_field, - fractional_seconds_precision: fsec_precision, - })) - } - - /// Peek at the next token and determine if it is a temporal unit - /// like `second`. - pub fn next_token_is_temporal_unit(&mut self) -> bool { - if let Token::Word(word) = self.peek_token().token { - matches!( - word.keyword, - Keyword::YEAR - | Keyword::MONTH - | Keyword::WEEK - | Keyword::DAY - | Keyword::HOUR - | Keyword::MINUTE - | Keyword::SECOND - | Keyword::CENTURY - | Keyword::DECADE - | Keyword::DOW - | Keyword::DOY - | Keyword::EPOCH - | Keyword::ISODOW - | Keyword::ISOYEAR - | Keyword::JULIAN - | Keyword::MICROSECOND - | Keyword::MICROSECONDS - | Keyword::MILLENIUM - | Keyword::MILLENNIUM - | Keyword::MILLISECOND - | Keyword::MILLISECONDS - | Keyword::NANOSECOND - | Keyword::NANOSECONDS - | Keyword::QUARTER - | Keyword::TIMEZONE - | Keyword::TIMEZONE_HOUR - | Keyword::TIMEZONE_MINUTE - ) - } else { - false - } - } - - /// Syntax - /// ```sql - /// -- typed - /// STRUCT<[field_name] field_type, ...>( expr1 [, ... ]) - /// -- typeless - /// STRUCT( expr1 [AS field_name] [, ... ]) - /// ``` - fn parse_struct_literal(&mut self) -> Result { - // Parse the fields definition if exist `<[field_name] field_type, ...>` - self.prev_token(); - let (fields, trailing_bracket) = - self.parse_struct_type_def(Self::parse_struct_field_def)?; - if trailing_bracket.0 { - return parser_err!( - "unmatched > in STRUCT literal", - self.peek_token().span.start - ); - } - - // Parse the struct values `(expr1 [, ... ])` - self.expect_token(&Token::LParen)?; - let values = self - .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; - self.expect_token(&Token::RParen)?; - - Ok(Expr::Struct { values, fields }) - } - - /// Parse an expression value for a struct literal - /// Syntax - /// ```sql - /// expr [AS name] - /// ``` - /// - /// For biquery [1], Parameter typed_syntax is set to true if the expression - /// is to be parsed as a field expression declared using typed - /// struct syntax [2], and false if using typeless struct syntax [3]. - /// - /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#constructing_a_struct - /// [2]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typed_struct_syntax - /// [3]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#typeless_struct_syntax - fn parse_struct_field_expr(&mut self, typed_syntax: bool) -> Result { - let expr = self.parse_expr()?; - if self.parse_keyword(Keyword::AS) { - if typed_syntax { - return parser_err!("Typed syntax does not allow AS", { - self.prev_token(); - self.peek_token().span.start - }); - } - let field_name = self.parse_identifier(false)?; - Ok(Expr::Named { - expr: expr.into(), - name: field_name, - }) - } else { - Ok(expr) - } - } - - /// Parse a Struct type definition as a sequence of field-value pairs. - /// The syntax of the Struct elem differs by dialect so it is customised - /// by the `elem_parser` argument. - /// - /// Syntax - /// ```sql - /// Hive: - /// STRUCT - /// - /// BigQuery: - /// STRUCT<[field_name] field_type> - /// ``` - fn parse_struct_type_def( - &mut self, - mut elem_parser: F, - ) -> Result<(Vec, MatchedTrailingBracket), ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result<(StructField, MatchedTrailingBracket), ParserError>, - { - let start_token = self.peek_token(); - self.expect_keyword(Keyword::STRUCT)?; - - // Nothing to do if we have no type information. - if Token::Lt != self.peek_token() { - return Ok((Default::default(), false.into())); - } - self.next_token(); - - let mut field_defs = vec![]; - let trailing_bracket = loop { - let (def, trailing_bracket) = elem_parser(self)?; - field_defs.push(def); - if !self.consume_token(&Token::Comma) { - break trailing_bracket; - } - - // Angle brackets are balanced so we only expect the trailing `>>` after - // we've matched all field types for the current struct. - // e.g. this is invalid syntax `STRUCT>>, INT>(NULL)` - if trailing_bracket.0 { - return parser_err!("unmatched > in STRUCT definition", start_token.span.start); - } - }; - - Ok(( - field_defs, - self.expect_closing_angle_bracket(trailing_bracket)?, - )) - } - - /// Duckdb Struct Data Type - fn parse_duckdb_struct_type_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::STRUCT)?; - self.expect_token(&Token::LParen)?; - let struct_body = self.parse_comma_separated(|parser| { - let field_name = parser.parse_identifier(false)?; - let field_type = parser.parse_data_type()?; - - Ok(StructField { - field_name: Some(field_name), - field_type, - }) - }); - self.expect_token(&Token::RParen)?; - struct_body - } - - /// Parse a field definition in a [struct] or [tuple]. - /// Syntax: - /// - /// ```sql - /// [field_name] field_type - /// ``` - /// - /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type - /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple - fn parse_struct_field_def( - &mut self, - ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { - // Look beyond the next item to infer whether both field name - // and type are specified. - let is_anonymous_field = !matches!( - (self.peek_nth_token(0).token, self.peek_nth_token(1).token), - (Token::Word(_), Token::Word(_)) - ); - - let field_name = if is_anonymous_field { - None - } else { - Some(self.parse_identifier(false)?) - }; - - let (field_type, trailing_bracket) = self.parse_data_type_helper()?; - - Ok(( - StructField { - field_name, - field_type, - }, - trailing_bracket, - )) - } - - /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. - /// - /// Syntax: - /// - /// ```sql - /// UNION(field_name field_type[,...]) - /// ``` - /// - /// [1]: https://duckdb.org/docs/sql/data_types/union.html - fn parse_union_type_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::UNION)?; - - self.expect_token(&Token::LParen)?; - - let fields = self.parse_comma_separated(|p| { - Ok(UnionField { - field_name: p.parse_identifier(false)?, - field_type: p.parse_data_type()?, - }) - })?; - - self.expect_token(&Token::RParen)?; - - Ok(fields) - } - - /// DuckDB specific: Parse a duckdb [dictionary] - /// - /// Syntax: - /// - /// ```sql - /// {'field_name': expr1[, ... ]} - /// ``` - /// - /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs - fn parse_duckdb_struct_literal(&mut self) -> Result { - self.expect_token(&Token::LBrace)?; - - let fields = self.parse_comma_separated(Self::parse_duckdb_dictionary_field)?; - - self.expect_token(&Token::RBrace)?; - - Ok(Expr::Dictionary(fields)) - } - - /// Parse a field for a duckdb [dictionary] - /// - /// Syntax - /// - /// ```sql - /// 'name': expr - /// ``` - /// - /// [dictionary]: https://duckdb.org/docs/sql/data_types/struct#creating-structs - fn parse_duckdb_dictionary_field(&mut self) -> Result { - let key = self.parse_identifier(false)?; - - self.expect_token(&Token::Colon)?; - - let expr = self.parse_expr()?; - - Ok(DictionaryField { - key, - value: Box::new(expr), - }) - } - - /// DuckDB specific: Parse a duckdb [map] - /// - /// Syntax: - /// - /// ```sql - /// Map {key1: value1[, ... ]} - /// ``` - /// - /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps - fn parse_duckdb_map_literal(&mut self) -> Result { - self.expect_token(&Token::LBrace)?; - let fields = self.parse_comma_separated0(Self::parse_duckdb_map_field, Token::RBrace)?; - self.expect_token(&Token::RBrace)?; - Ok(Expr::Map(Map { entries: fields })) - } - - /// Parse a field for a duckdb [map] - /// - /// Syntax - /// - /// ```sql - /// key: value - /// ``` - /// - /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps - fn parse_duckdb_map_field(&mut self) -> Result { - let key = self.parse_expr()?; - - self.expect_token(&Token::Colon)?; - - let value = self.parse_expr()?; - - Ok(MapEntry { - key: Box::new(key), - value: Box::new(value), - }) - } - - /// Parse clickhouse [map] - /// - /// Syntax - /// - /// ```sql - /// Map(key_data_type, value_data_type) - /// ``` - /// - /// [map]: https://clickhouse.com/docs/en/sql-reference/data-types/map - fn parse_click_house_map_def(&mut self) -> Result<(DataType, DataType), ParserError> { - self.expect_keyword(Keyword::MAP)?; - self.expect_token(&Token::LParen)?; - let key_data_type = self.parse_data_type()?; - self.expect_token(&Token::Comma)?; - let value_data_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - - Ok((key_data_type, value_data_type)) - } - - /// Parse clickhouse [tuple] - /// - /// Syntax - /// - /// ```sql - /// Tuple([field_name] field_type, ...) - /// ``` - /// - /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple - fn parse_click_house_tuple_def(&mut self) -> Result, ParserError> { - self.expect_keyword(Keyword::TUPLE)?; - self.expect_token(&Token::LParen)?; - let mut field_defs = vec![]; - loop { - let (def, _) = self.parse_struct_field_def()?; - field_defs.push(def); - if !self.consume_token(&Token::Comma) { - break; - } - } - self.expect_token(&Token::RParen)?; - - Ok(field_defs) - } - - /// For nested types that use the angle bracket syntax, this matches either - /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously - /// matched `trailing_bracket` argument). It returns whether there is a trailing - /// left to be matched - (i.e. if '>>' was matched). - fn expect_closing_angle_bracket( - &mut self, - trailing_bracket: MatchedTrailingBracket, - ) -> Result { - let trailing_bracket = if !trailing_bracket.0 { - match self.peek_token().token { - Token::Gt => { - self.next_token(); - false.into() - } - Token::ShiftRight => { - self.next_token(); - true.into() - } - _ => return self.expected(">", self.peek_token()), - } - } else { - false.into() - }; - - Ok(trailing_bracket) - } - - /// Parse an operator following an expression - pub fn parse_infix(&mut self, expr: Expr, precedence: u8) -> Result { - // allow the dialect to override infix parsing - if let Some(infix) = self.dialect.parse_infix(self, &expr, precedence) { - return infix; - } - - let mut tok = self.next_token(); - let regular_binary_operator = match &mut tok.token { - Token::Spaceship => Some(BinaryOperator::Spaceship), - Token::DoubleEq => Some(BinaryOperator::Eq), - Token::Eq => Some(BinaryOperator::Eq), - Token::Neq => Some(BinaryOperator::NotEq), - Token::Gt => Some(BinaryOperator::Gt), - Token::GtEq => Some(BinaryOperator::GtEq), - Token::Lt => Some(BinaryOperator::Lt), - Token::LtEq => Some(BinaryOperator::LtEq), - Token::Plus => Some(BinaryOperator::Plus), - Token::Minus => Some(BinaryOperator::Minus), - Token::Mul => Some(BinaryOperator::Multiply), - Token::Mod => Some(BinaryOperator::Modulo), - Token::StringConcat => Some(BinaryOperator::StringConcat), - Token::Pipe => Some(BinaryOperator::BitwiseOr), - Token::Caret => { - // In PostgreSQL, ^ stands for the exponentiation operation, - // and # stands for XOR. See https://www.postgresql.org/docs/current/functions-math.html - if dialect_of!(self is PostgreSqlDialect) { - Some(BinaryOperator::PGExp) - } else { - Some(BinaryOperator::BitwiseXor) - } - } - Token::Ampersand => Some(BinaryOperator::BitwiseAnd), - Token::Div => Some(BinaryOperator::Divide), - Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { - Some(BinaryOperator::DuckIntegerDivide) - } - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { - Some(BinaryOperator::PGBitwiseShiftLeft) - } - Token::ShiftRight if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { - Some(BinaryOperator::PGBitwiseShiftRight) - } - Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { - Some(BinaryOperator::PGBitwiseXor) - } - Token::Overlap if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - Some(BinaryOperator::PGOverlap) - } - Token::CaretAt if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - Some(BinaryOperator::PGStartsWith) - } - Token::Tilde => Some(BinaryOperator::PGRegexMatch), - Token::TildeAsterisk => Some(BinaryOperator::PGRegexIMatch), - Token::ExclamationMarkTilde => Some(BinaryOperator::PGRegexNotMatch), - Token::ExclamationMarkTildeAsterisk => Some(BinaryOperator::PGRegexNotIMatch), - Token::DoubleTilde => Some(BinaryOperator::PGLikeMatch), - Token::DoubleTildeAsterisk => Some(BinaryOperator::PGILikeMatch), - Token::ExclamationMarkDoubleTilde => Some(BinaryOperator::PGNotLikeMatch), - Token::ExclamationMarkDoubleTildeAsterisk => Some(BinaryOperator::PGNotILikeMatch), - Token::Arrow => Some(BinaryOperator::Arrow), - Token::LongArrow => Some(BinaryOperator::LongArrow), - Token::HashArrow => Some(BinaryOperator::HashArrow), - Token::HashLongArrow => Some(BinaryOperator::HashLongArrow), - Token::AtArrow => Some(BinaryOperator::AtArrow), - Token::ArrowAt => Some(BinaryOperator::ArrowAt), - Token::HashMinus => Some(BinaryOperator::HashMinus), - Token::AtQuestion => Some(BinaryOperator::AtQuestion), - Token::AtAt => Some(BinaryOperator::AtAt), - Token::Question => Some(BinaryOperator::Question), - Token::QuestionAnd => Some(BinaryOperator::QuestionAnd), - Token::QuestionPipe => Some(BinaryOperator::QuestionPipe), - Token::CustomBinaryOperator(s) => Some(BinaryOperator::Custom(core::mem::take(s))), - - Token::Word(w) => match w.keyword { - Keyword::AND => Some(BinaryOperator::And), - Keyword::OR => Some(BinaryOperator::Or), - Keyword::XOR => Some(BinaryOperator::Xor), - Keyword::OPERATOR if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - self.expect_token(&Token::LParen)?; - // there are special rules for operator names in - // postgres so we can not use 'parse_object' - // or similar. - // See https://www.postgresql.org/docs/current/sql-createoperator.html - let mut idents = vec![]; - loop { - idents.push(self.next_token().to_string()); - if !self.consume_token(&Token::Period) { - break; - } - } - self.expect_token(&Token::RParen)?; - Some(BinaryOperator::PGCustomBinaryOperator(idents)) - } - _ => None, - }, - _ => None, - }; - - if let Some(op) = regular_binary_operator { - if let Some(keyword) = - self.parse_one_of_keywords(&[Keyword::ANY, Keyword::ALL, Keyword::SOME]) - { - self.expect_token(&Token::LParen)?; - let right = if self.peek_sub_query() { - // We have a subquery ahead (SELECT\WITH ...) need to rewind and - // use the parenthesis for parsing the subquery as an expression. - self.prev_token(); // LParen - self.parse_subexpr(precedence)? - } else { - // Non-subquery expression - let right = self.parse_subexpr(precedence)?; - self.expect_token(&Token::RParen)?; - right - }; - - if !matches!( - op, - BinaryOperator::Gt - | BinaryOperator::Lt - | BinaryOperator::GtEq - | BinaryOperator::LtEq - | BinaryOperator::Eq - | BinaryOperator::NotEq - ) { - return parser_err!( - format!( - "Expected one of [=, >, <, =>, =<, !=] as comparison operator, found: {op}" - ), - tok.span.start - ); - }; - - Ok(match keyword { - Keyword::ALL => Expr::AllOp { - left: Box::new(expr), - compare_op: op, - right: Box::new(right), - }, - Keyword::ANY | Keyword::SOME => Expr::AnyOp { - left: Box::new(expr), - compare_op: op, - right: Box::new(right), - is_some: keyword == Keyword::SOME, - }, - _ => unreachable!(), - }) - } else { - Ok(Expr::BinaryOp { - left: Box::new(expr), - op, - right: Box::new(self.parse_subexpr(precedence)?), - }) - } - } else if let Token::Word(w) = &tok.token { - match w.keyword { - Keyword::IS => { - if self.parse_keyword(Keyword::NULL) { - Ok(Expr::IsNull(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { - Ok(Expr::IsNotNull(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::TRUE]) { - Ok(Expr::IsTrue(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::TRUE]) { - Ok(Expr::IsNotTrue(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::FALSE]) { - Ok(Expr::IsFalse(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::FALSE]) { - Ok(Expr::IsNotFalse(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::UNKNOWN]) { - Ok(Expr::IsUnknown(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::UNKNOWN]) { - Ok(Expr::IsNotUnknown(Box::new(expr))) - } else if self.parse_keywords(&[Keyword::DISTINCT, Keyword::FROM]) { - let expr2 = self.parse_expr()?; - Ok(Expr::IsDistinctFrom(Box::new(expr), Box::new(expr2))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::DISTINCT, Keyword::FROM]) - { - let expr2 = self.parse_expr()?; - Ok(Expr::IsNotDistinctFrom(Box::new(expr), Box::new(expr2))) - } else { - self.expected( - "[NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS", - self.peek_token(), - ) - } - } - Keyword::AT => { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - Ok(Expr::AtTimeZone { - timestamp: Box::new(expr), - time_zone: Box::new(self.parse_subexpr(precedence)?), - }) - } - Keyword::NOT - | Keyword::IN - | Keyword::BETWEEN - | Keyword::LIKE - | Keyword::ILIKE - | Keyword::SIMILAR - | Keyword::REGEXP - | Keyword::RLIKE => { - self.prev_token(); - let negated = self.parse_keyword(Keyword::NOT); - let regexp = self.parse_keyword(Keyword::REGEXP); - let rlike = self.parse_keyword(Keyword::RLIKE); - if regexp || rlike { - Ok(Expr::RLike { - negated, - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - regexp, - }) - } else if self.parse_keyword(Keyword::IN) { - self.parse_in(expr, negated) - } else if self.parse_keyword(Keyword::BETWEEN) { - self.parse_between(expr, negated) - } else if self.parse_keyword(Keyword::LIKE) { - Ok(Expr::Like { - negated, - any: self.parse_keyword(Keyword::ANY), - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - escape_char: self.parse_escape_char()?, - }) - } else if self.parse_keyword(Keyword::ILIKE) { - Ok(Expr::ILike { - negated, - any: self.parse_keyword(Keyword::ANY), - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - escape_char: self.parse_escape_char()?, - }) - } else if self.parse_keywords(&[Keyword::SIMILAR, Keyword::TO]) { - Ok(Expr::SimilarTo { - negated, - expr: Box::new(expr), - pattern: Box::new( - self.parse_subexpr(self.dialect.prec_value(Precedence::Like))?, - ), - escape_char: self.parse_escape_char()?, - }) - } else { - self.expected("IN or BETWEEN after NOT", self.peek_token()) - } - } - // Can only happen if `get_next_precedence` got out of sync with this function - _ => parser_err!( - format!("No infix parser for token {:?}", tok.token), - tok.span.start - ), - } - } else if Token::DoubleColon == tok { - Ok(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(expr), - data_type: self.parse_data_type()?, - format: None, - }) - } else if Token::ExclamationMark == tok && self.dialect.supports_factorial_operator() { - Ok(Expr::UnaryOp { - op: UnaryOperator::PGPostfixFactorial, - expr: Box::new(expr), - }) - } else if Token::LBracket == tok { - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) { - self.parse_subscript(expr) - } else if dialect_of!(self is SnowflakeDialect) || self.dialect.supports_partiql() { - self.prev_token(); - self.parse_json_access(expr) - } else { - self.parse_map_access(expr) - } - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == tok { - self.prev_token(); - self.parse_json_access(expr) - } else { - // Can only happen if `get_next_precedence` got out of sync with this function - parser_err!( - format!("No infix parser for token {:?}", tok.token), - tok.span.start - ) - } - } - - /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` - pub fn parse_escape_char(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::ESCAPE) { - Ok(Some(self.parse_literal_string()?)) - } else { - Ok(None) - } - } - - /// Parses an array subscript like - /// * `[:]` - /// * `[l]` - /// * `[l:]` - /// * `[:u]` - /// * `[l:u]` - /// * `[l:u:s]` - /// - /// Parser is right after `[` - fn parse_subscript_inner(&mut self) -> Result { - // at either `:(rest)` or `:(rest)]` - let lower_bound = if self.consume_token(&Token::Colon) { - None - } else { - Some(self.parse_expr()?) - }; - - // check for end - if self.consume_token(&Token::RBracket) { - if let Some(lower_bound) = lower_bound { - return Ok(Subscript::Index { index: lower_bound }); - }; - return Ok(Subscript::Slice { - lower_bound, - upper_bound: None, - stride: None, - }); - } - - // consume the `:` - if lower_bound.is_some() { - self.expect_token(&Token::Colon)?; - } - - // we are now at either `]`, `(rest)]` - let upper_bound = if self.consume_token(&Token::RBracket) { - return Ok(Subscript::Slice { - lower_bound, - upper_bound: None, - stride: None, - }); - } else { - Some(self.parse_expr()?) - }; - - // check for end - if self.consume_token(&Token::RBracket) { - return Ok(Subscript::Slice { - lower_bound, - upper_bound, - stride: None, - }); - } - - // we are now at `:]` or `:stride]` - self.expect_token(&Token::Colon)?; - let stride = if self.consume_token(&Token::RBracket) { - None - } else { - Some(self.parse_expr()?) - }; - - if stride.is_some() { - self.expect_token(&Token::RBracket)?; - } - - Ok(Subscript::Slice { - lower_bound, - upper_bound, - stride, - }) - } - - /// Parses an array subscript like `[1:3]` - /// - /// Parser is right after `[` - pub fn parse_subscript(&mut self, expr: Expr) -> Result { - let subscript = self.parse_subscript_inner()?; - Ok(Expr::Subscript { - expr: Box::new(expr), - subscript: Box::new(subscript), - }) - } - - fn parse_json_path_object_key(&mut self) -> Result { - let token = self.next_token(); - match token.token { - Token::Word(Word { - value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), - // some experimentation suggests that snowflake permits - // any keyword here unquoted. - keyword: _, - }) => Ok(JsonPathElem::Dot { - key: value, - quoted: quote_style.is_some(), - }), - - // This token should never be generated on snowflake or generic - // dialects, but we handle it just in case this is used on future - // dialects. - Token::DoubleQuotedString(key) => Ok(JsonPathElem::Dot { key, quoted: true }), - - _ => self.expected("variant object key name", token), - } - } - - fn parse_json_access(&mut self, expr: Expr) -> Result { - let path = self.parse_json_path()?; - Ok(Expr::JsonAccess { - value: Box::new(expr), - path, - }) - } - - fn parse_json_path(&mut self) -> Result { - let mut path = Vec::new(); - loop { - match self.next_token().token { - Token::Colon if path.is_empty() => { - path.push(self.parse_json_path_object_key()?); - } - Token::Period if !path.is_empty() => { - path.push(self.parse_json_path_object_key()?); - } - Token::LBracket => { - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - - path.push(JsonPathElem::Bracket { key }); - } - _ => { - self.prev_token(); - break; - } - }; - } - - debug_assert!(!path.is_empty()); - Ok(JsonPath { path }) - } - - pub fn parse_map_access(&mut self, expr: Expr) -> Result { - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - - let mut keys = vec![MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - }]; - loop { - let key = match self.peek_token().token { - Token::LBracket => { - self.next_token(); // consume `[` - let key = self.parse_expr()?; - self.expect_token(&Token::RBracket)?; - MapAccessKey { - key, - syntax: MapAccessSyntax::Bracket, - } - } - // Access on BigQuery nested and repeated expressions can - // mix notations in the same expression. - // https://cloud.google.com/bigquery/docs/nested-repeated#query_nested_and_repeated_columns - Token::Period if dialect_of!(self is BigQueryDialect) => { - self.next_token(); // consume `.` - MapAccessKey { - key: self.parse_expr()?, - syntax: MapAccessSyntax::Period, - } - } - _ => break, - }; - keys.push(key); - } - - Ok(Expr::MapAccess { - column: Box::new(expr), - keys, - }) - } - - /// Parses the parens following the `[ NOT ] IN` operator. - pub fn parse_in(&mut self, expr: Expr, negated: bool) -> Result { - // BigQuery allows `IN UNNEST(array_expression)` - // https://cloud.google.com/bigquery/docs/reference/standard-sql/operators#in_operators - if self.parse_keyword(Keyword::UNNEST) { - self.expect_token(&Token::LParen)?; - let array_expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - return Ok(Expr::InUnnest { - expr: Box::new(expr), - array_expr: Box::new(array_expr), - negated, - }); - } - self.expect_token(&Token::LParen)?; - let in_op = if self.parse_keyword(Keyword::SELECT) || self.parse_keyword(Keyword::WITH) { - self.prev_token(); - Expr::InSubquery { - expr: Box::new(expr), - subquery: self.parse_query()?, - negated, - } - } else { - Expr::InList { - expr: Box::new(expr), - list: if self.dialect.supports_in_empty_list() { - self.parse_comma_separated0(Parser::parse_expr, Token::RParen)? - } else { - self.parse_comma_separated(Parser::parse_expr)? - }, - negated, - } - }; - self.expect_token(&Token::RParen)?; - Ok(in_op) - } - - /// Parses `BETWEEN AND `, assuming the `BETWEEN` keyword was already consumed. - pub fn parse_between(&mut self, expr: Expr, negated: bool) -> Result { - // Stop parsing subexpressions for and on tokens with - // precedence lower than that of `BETWEEN`, such as `AND`, `IS`, etc. - let low = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; - self.expect_keyword(Keyword::AND)?; - let high = self.parse_subexpr(self.dialect.prec_value(Precedence::Between))?; - Ok(Expr::Between { - expr: Box::new(expr), - negated, - low: Box::new(low), - high: Box::new(high), - }) - } - - /// Parse a postgresql casting style which is in the form of `expr::datatype`. - pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { - Ok(Expr::Cast { - kind: CastKind::DoubleColon, - expr: Box::new(expr), - data_type: self.parse_data_type()?, - format: None, - }) - } - - /// Get the precedence of the next token - pub fn get_next_precedence(&self) -> Result { - self.dialect.get_next_precedence_default(self) - } - - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) - pub fn peek_token(&self) -> TokenWithSpan { - self.peek_nth_token(0) - } - - /// Returns the `N` next non-whitespace tokens that have not yet been - /// processed. - /// - /// Example: - /// ```rust - /// # use sqlparser::dialect::GenericDialect; - /// # use sqlparser::parser::Parser; - /// # use sqlparser::keywords::Keyword; - /// # use sqlparser::tokenizer::{Token, Word}; - /// let dialect = GenericDialect {}; - /// let mut parser = Parser::new(&dialect).try_with_sql("ORDER BY foo, bar").unwrap(); - /// - /// // Note that Rust infers the number of tokens to peek based on the - /// // length of the slice pattern! - /// assert!(matches!( - /// parser.peek_tokens(), - /// [ - /// Token::Word(Word { keyword: Keyword::ORDER, .. }), - /// Token::Word(Word { keyword: Keyword::BY, .. }), - /// ] - /// )); - /// ``` - pub fn peek_tokens(&self) -> [Token; N] { - self.peek_tokens_with_location() - .map(|with_loc| with_loc.token) - } - - /// Returns the `N` next non-whitespace tokens with locations that have not - /// yet been processed. - /// - /// See [`Self::peek_token`] for an example. - pub fn peek_tokens_with_location(&self) -> [TokenWithSpan; N] { - let mut index = self.index; - core::array::from_fn(|_| loop { - let token = self.tokens.get(index); - index += 1; - if let Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) = token - { - continue; - } - break token.cloned().unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }); - }) - } - - /// Return nth non-whitespace token that has not yet been processed - pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan { - let mut index = self.index; - loop { - index += 1; - match self.tokens.get(index - 1) { - Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) => continue, - non_whitespace => { - if n == 0 { - return non_whitespace.cloned().unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }); - } - n -= 1; - } - } - } - } - - /// Return the first token, possibly whitespace, that has not yet been processed - /// (or None if reached end-of-file). - pub fn peek_token_no_skip(&self) -> TokenWithSpan { - self.peek_nth_token_no_skip(0) - } - - /// Return nth token, possibly whitespace, that has not yet been processed. - pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan { - self.tokens - .get(self.index + n) - .cloned() - .unwrap_or(TokenWithSpan { - token: Token::EOF, - span: Span::empty(), - }) - } - - /// Look for all of the expected keywords in sequence, without consuming them - fn peek_keywords(&mut self, expected: &[Keyword]) -> bool { - let index = self.index; - let matched = self.parse_keywords(expected); - self.index = index; - matched - } - - /// Return the first non-whitespace token that has not yet been processed - /// (or None if reached end-of-file) and mark it as processed. OK to call - /// repeatedly after reaching EOF. - pub fn next_token(&mut self) -> TokenWithSpan { - loop { - self.index += 1; - match self.tokens.get(self.index - 1) { - Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) => continue, - token => { - return token - .cloned() - .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF)) - } - } - } - } - - /// Return the first unprocessed token, possibly whitespace. - pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { - self.index += 1; - self.tokens.get(self.index - 1) - } - - /// Push back the last one non-whitespace token. Must be called after - /// `next_token()`, otherwise might panic. OK to call after - /// `next_token()` indicates an EOF. - pub fn prev_token(&mut self) { - loop { - assert!(self.index > 0); - self.index -= 1; - if let Some(TokenWithSpan { - token: Token::Whitespace(_), - span: _, - }) = self.tokens.get(self.index) - { - continue; - } - return; - } - } - - /// Report `found` was encountered instead of `expected` - pub fn expected(&self, expected: &str, found: TokenWithSpan) -> Result { - parser_err!( - format!("Expected: {expected}, found: {found}"), - found.span.start - ) - } - - /// If the current token is the `expected` keyword, consume it and returns - /// true. Otherwise, no tokens are consumed and returns false. - #[must_use] - pub fn parse_keyword(&mut self, expected: Keyword) -> bool { - self.parse_keyword_token(expected).is_some() - } - - #[must_use] - pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option { - match self.peek_token().token { - Token::Word(w) if expected == w.keyword => Some(self.next_token()), - _ => None, - } - } - - #[must_use] - pub fn peek_keyword(&mut self, expected: Keyword) -> bool { - matches!(self.peek_token().token, Token::Word(w) if expected == w.keyword) - } - - /// If the current token is the `expected` keyword followed by - /// specified tokens, consume them and returns true. - /// Otherwise, no tokens are consumed and returns false. - /// - /// Note that if the length of `tokens` is too long, this function will - /// not be efficient as it does a loop on the tokens with `peek_nth_token` - /// each time. - pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { - match self.peek_token().token { - Token::Word(w) if expected == w.keyword => { - for (idx, token) in tokens.iter().enumerate() { - if self.peek_nth_token(idx + 1).token != *token { - return false; - } - } - // consume all tokens - for _ in 0..(tokens.len() + 1) { - self.next_token(); - } - true - } - _ => false, - } - } - - /// If the current and subsequent tokens exactly match the `keywords` - /// sequence, consume them and returns true. Otherwise, no tokens are - /// consumed and returns false - #[must_use] - pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { - let index = self.index; - for &keyword in keywords { - if !self.parse_keyword(keyword) { - // println!("parse_keywords aborting .. did not find {:?}", keyword); - // reset index and return immediately - self.index = index; - return false; - } - } - true - } - - /// If the current token is one of the given `keywords`, consume the token - /// and return the keyword that matches. Otherwise, no tokens are consumed - /// and returns [`None`]. - #[must_use] - pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { - match self.peek_token().token { - Token::Word(w) => { - keywords - .iter() - .find(|keyword| **keyword == w.keyword) - .map(|keyword| { - self.next_token(); - *keyword - }) - } - _ => None, - } - } - - /// If the current token is one of the expected keywords, consume the token - /// and return the keyword that matches. Otherwise, return an error. - pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { - if let Some(keyword) = self.parse_one_of_keywords(keywords) { - Ok(keyword) - } else { - let keywords: Vec = keywords.iter().map(|x| format!("{x:?}")).collect(); - self.expected( - &format!("one of {}", keywords.join(" or ")), - self.peek_token(), - ) - } - } - - /// If the current token is the `expected` keyword, consume the token. - /// Otherwise, return an error. - pub fn expect_keyword(&mut self, expected: Keyword) -> Result { - if let Some(token) = self.parse_keyword_token(expected) { - Ok(token) - } else { - self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) - } - } - - /// If the current and subsequent tokens exactly match the `keywords` - /// sequence, consume them and returns Ok. Otherwise, return an Error. - pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { - for &kw in expected { - self.expect_keyword(kw)?; - } - Ok(()) - } - - /// Consume the next token if it matches the expected token, otherwise return false - #[must_use] - pub fn consume_token(&mut self, expected: &Token) -> bool { - if self.peek_token() == *expected { - self.next_token(); - true - } else { - false - } - } - - /// If the current and subsequent tokens exactly match the `tokens` - /// sequence, consume them and returns true. Otherwise, no tokens are - /// consumed and returns false - #[must_use] - pub fn consume_tokens(&mut self, tokens: &[Token]) -> bool { - let index = self.index; - for token in tokens { - if !self.consume_token(token) { - self.index = index; - return false; - } - } - true - } - - /// Bail out if the current token is not an expected keyword, or consume it if it is - pub fn expect_token(&mut self, expected: &Token) -> Result { - if self.peek_token() == *expected { - Ok(self.next_token()) - } else { - self.expected(&expected.to_string(), self.peek_token()) - } - } - - fn parse(s: String, loc: Location) -> Result - where - ::Err: Display, - { - s.parse::().map_err(|e| { - ParserError::ParserError(format!( - "Could not parse '{s}' as {}: {e}{loc}", - core::any::type_name::() - )) - }) - } - - /// Parse a comma-separated list of 1+ SelectItem - pub fn parse_projection(&mut self) -> Result, ParserError> { - // BigQuery and Snowflake allow trailing commas, but only in project lists - // e.g. `SELECT 1, 2, FROM t` - // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas - // https://docs.snowflake.com/en/release-notes/2024/8_11#select-supports-trailing-commas - - let trailing_commas = - self.options.trailing_commas | self.dialect.supports_projection_trailing_commas(); - - self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas) - } - - pub fn parse_actions_list(&mut self) -> Result, ParserError> { - let mut values = vec![]; - loop { - values.push(self.parse_grant_permission()?); - if !self.consume_token(&Token::Comma) { - break; - } else if self.options.trailing_commas { - match self.peek_token().token { - Token::Word(kw) if kw.keyword == Keyword::ON => { - break; - } - Token::RParen - | Token::SemiColon - | Token::EOF - | Token::RBracket - | Token::RBrace => break, - _ => continue, - } - } - } - Ok(values) - } - - /// Parse the comma of a comma-separated syntax element. - /// Allows for control over trailing commas - /// Returns true if there is a next element - fn is_parse_comma_separated_end_with_trailing_commas(&mut self, trailing_commas: bool) -> bool { - if !self.consume_token(&Token::Comma) { - true - } else if trailing_commas { - let token = self.peek_token().token; - match token { - Token::Word(ref kw) - if keywords::RESERVED_FOR_COLUMN_ALIAS.contains(&kw.keyword) => - { - true - } - Token::RParen | Token::SemiColon | Token::EOF | Token::RBracket | Token::RBrace => { - true - } - _ => false, - } - } else { - false - } - } - - /// Parse the comma of a comma-separated syntax element. - /// Returns true if there is a next element - fn is_parse_comma_separated_end(&mut self) -> bool { - self.is_parse_comma_separated_end_with_trailing_commas(self.options.trailing_commas) - } - - /// Parse a comma-separated list of 1+ items accepted by `F` - pub fn parse_comma_separated(&mut self, f: F) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - self.parse_comma_separated_with_trailing_commas(f, self.options.trailing_commas) - } - - /// Parse a comma-separated list of 1+ items accepted by `F` - /// Allows for control over trailing commas - fn parse_comma_separated_with_trailing_commas( - &mut self, - mut f: F, - trailing_commas: bool, - ) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - let mut values = vec![]; - loop { - values.push(f(self)?); - if self.is_parse_comma_separated_end_with_trailing_commas(trailing_commas) { - break; - } - } - Ok(values) - } - - /// Parse a keyword-separated list of 1+ items accepted by `F` - pub fn parse_keyword_separated( - &mut self, - keyword: Keyword, - mut f: F, - ) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - let mut values = vec![]; - loop { - values.push(f(self)?); - if !self.parse_keyword(keyword) { - break; - } - } - Ok(values) - } - - pub fn parse_parenthesized(&mut self, mut f: F) -> Result - where - F: FnMut(&mut Parser<'a>) -> Result, - { - self.expect_token(&Token::LParen)?; - let res = f(self)?; - self.expect_token(&Token::RParen)?; - Ok(res) - } - - /// Parse a comma-separated list of 0+ items accepted by `F` - /// * `end_token` - expected end token for the closure (e.g. [Token::RParen], [Token::RBrace] ...) - pub fn parse_comma_separated0( - &mut self, - f: F, - end_token: Token, - ) -> Result, ParserError> - where - F: FnMut(&mut Parser<'a>) -> Result, - { - if self.peek_token().token == end_token { - return Ok(vec![]); - } - - if self.options.trailing_commas && self.peek_tokens() == [Token::Comma, end_token] { - let _ = self.consume_token(&Token::Comma); - return Ok(vec![]); - } - - self.parse_comma_separated(f) - } - - /// Run a parser method `f`, reverting back to the current position if unsuccessful. - /// Returns `None` if `f` returns an error - pub fn maybe_parse(&mut self, f: F) -> Result, ParserError> - where - F: FnMut(&mut Parser) -> Result, - { - match self.try_parse(f) { - Ok(t) => Ok(Some(t)), - Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded), - _ => Ok(None), - } - } - - /// Run a parser method `f`, reverting back to the current position if unsuccessful. - pub fn try_parse(&mut self, mut f: F) -> Result - where - F: FnMut(&mut Parser) -> Result, - { - let index = self.index; - match f(self) { - Ok(t) => Ok(t), - Err(e) => { - // Unwind stack if limit exceeded - self.index = index; - Err(e) - } - } - } - - /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed - /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. - pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; - let all = self.parse_keyword(Keyword::ALL); - let distinct = self.parse_keyword(Keyword::DISTINCT); - if !distinct { - return Ok(None); - } - if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); - } - let on = self.parse_keyword(Keyword::ON); - if !on { - return Ok(Some(Distinct::Distinct)); - } - - self.expect_token(&Token::LParen)?; - let col_names = if self.consume_token(&Token::RParen) { - self.prev_token(); - Vec::new() - } else { - self.parse_comma_separated(Parser::parse_expr)? - }; - self.expect_token(&Token::RParen)?; - Ok(Some(Distinct::On(col_names))) - } - - /// Parse a SQL CREATE statement - pub fn parse_create(&mut self) -> Result { - let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); - let or_alter = self.parse_keywords(&[Keyword::OR, Keyword::ALTER]); - let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); - let global = self.parse_one_of_keywords(&[Keyword::GLOBAL]).is_some(); - let transient = self.parse_one_of_keywords(&[Keyword::TRANSIENT]).is_some(); - let global: Option = if global { - Some(true) - } else if local { - Some(false) - } else { - None - }; - let temporary = self - .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) - .is_some(); - let persistent = dialect_of!(self is DuckDbDialect) - && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); - if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace, temporary, global, transient) - } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { - self.prev_token(); - self.parse_create_view(or_replace, temporary) - } else if self.parse_keyword(Keyword::POLICY) { - self.parse_create_policy() - } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) - } else if self.parse_keyword(Keyword::FUNCTION) { - self.parse_create_function(or_replace, temporary) - } else if self.parse_keyword(Keyword::TRIGGER) { - self.parse_create_trigger(or_replace, false) - } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { - self.parse_create_trigger(or_replace, true) - } else if self.parse_keyword(Keyword::MACRO) { - self.parse_create_macro(or_replace, temporary) - } else if self.parse_keyword(Keyword::SECRET) { - self.parse_create_secret(or_replace, temporary, persistent) - } else if or_replace { - self.expected( - "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", - self.peek_token(), - ) - } else if self.parse_keyword(Keyword::EXTENSION) { - self.parse_create_extension() - } else if self.parse_keyword(Keyword::INDEX) { - self.parse_create_index(false) - } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { - self.parse_create_index(true) - } else if self.parse_keyword(Keyword::VIRTUAL) { - self.parse_create_virtual_table() - } else if self.parse_keyword(Keyword::SCHEMA) { - self.parse_create_schema() - } else if self.parse_keyword(Keyword::DATABASE) { - self.parse_create_database() - } else if self.parse_keyword(Keyword::ROLE) { - self.parse_create_role() - } else if self.parse_keyword(Keyword::SEQUENCE) { - self.parse_create_sequence(temporary) - } else if self.parse_keyword(Keyword::TYPE) { - self.parse_create_type() - } else if self.parse_keyword(Keyword::PROCEDURE) { - self.parse_create_procedure(or_alter) - } else { - self.expected("an object type after CREATE", self.peek_token()) - } - } - - /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. - pub fn parse_create_secret( - &mut self, - or_replace: bool, - temporary: bool, - persistent: bool, - ) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - - let mut storage_specifier = None; - let mut name = None; - if self.peek_token() != Token::LParen { - if self.parse_keyword(Keyword::IN) { - storage_specifier = self.parse_identifier(false).ok() - } else { - name = self.parse_identifier(false).ok(); - } - - // Storage specifier may follow the name - if storage_specifier.is_none() - && self.peek_token() != Token::LParen - && self.parse_keyword(Keyword::IN) - { - storage_specifier = self.parse_identifier(false).ok(); - } - } - - self.expect_token(&Token::LParen)?; - self.expect_keyword(Keyword::TYPE)?; - let secret_type = self.parse_identifier(false)?; - - let mut options = Vec::new(); - if self.consume_token(&Token::Comma) { - options.append(&mut self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - let value = p.parse_identifier(false)?; - Ok(SecretOption { key, value }) - })?); - } - self.expect_token(&Token::RParen)?; - - let temp = match (temporary, persistent) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, - }; - - Ok(Statement::CreateSecret { - or_replace, - temporary: temp, - if_not_exists, - name, - storage_specifier, - secret_type, - options, - }) - } - - /// Parse a CACHE TABLE statement - pub fn parse_cache_table(&mut self) -> Result { - let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); - if self.parse_keyword(Keyword::TABLE) { - let table_name = self.parse_object_name(false)?; - if self.peek_token().token != Token::EOF { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = self.parse_options(Keyword::OPTIONS)? - } - }; - - if self.peek_token().token != Token::EOF { - let (a, q) = self.parse_as_query()?; - has_as = a; - query = Some(q); - } - - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } else { - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } - } else { - table_flag = Some(self.parse_object_name(false)?); - if self.parse_keyword(Keyword::TABLE) { - let table_name = self.parse_object_name(false)?; - if self.peek_token() != Token::EOF { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = self.parse_options(Keyword::OPTIONS)? - } - }; - - if self.peek_token() != Token::EOF { - let (a, q) = self.parse_as_query()?; - has_as = a; - query = Some(q); - } - - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } else { - Ok(Statement::Cache { - table_flag, - table_name, - has_as, - options, - query, - }) - } - } else { - if self.peek_token() == Token::EOF { - self.prev_token(); - } - self.expected("a `TABLE` keyword", self.peek_token()) - } - } - } - - /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX` - pub fn parse_as_query(&mut self) -> Result<(bool, Box), ParserError> { - match self.peek_token().token { - Token::Word(word) => match word.keyword { - Keyword::AS => { - self.next_token(); - Ok((true, self.parse_query()?)) - } - _ => Ok((false, self.parse_query()?)), - }, - _ => self.expected("a QUERY statement", self.peek_token()), - } - } - - /// Parse a UNCACHE TABLE statement - pub fn parse_uncache_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - Ok(Statement::UNCache { - table_name, - if_exists, - }) - } - - /// SQLite-specific `CREATE VIRTUAL TABLE` - pub fn parse_create_virtual_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::USING)?; - let module_name = self.parse_identifier(false)?; - // SQLite docs note that module "arguments syntax is sufficiently - // general that the arguments can be made to appear as column - // definitions in a traditional CREATE TABLE statement", but - // we don't implement that. - let module_args = self.parse_parenthesized_column_list(Optional, false)?; - Ok(Statement::CreateVirtualTable { - name: table_name, - if_not_exists, - module_name, - module_args, - }) - } - - pub fn parse_create_schema(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - - let schema_name = self.parse_schema_name()?; - - Ok(Statement::CreateSchema { - schema_name, - if_not_exists, - }) - } - - fn parse_schema_name(&mut self) -> Result { - if self.parse_keyword(Keyword::AUTHORIZATION) { - Ok(SchemaName::UnnamedAuthorization( - self.parse_identifier(false)?, - )) - } else { - let name = self.parse_object_name(false)?; - - if self.parse_keyword(Keyword::AUTHORIZATION) { - Ok(SchemaName::NamedAuthorization( - name, - self.parse_identifier(false)?, - )) - } else { - Ok(SchemaName::Simple(name)) - } - } - } - - pub fn parse_create_database(&mut self) -> Result { - let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let db_name = self.parse_object_name(false)?; - let mut location = None; - let mut managed_location = None; - loop { - match self.parse_one_of_keywords(&[Keyword::LOCATION, Keyword::MANAGEDLOCATION]) { - Some(Keyword::LOCATION) => location = Some(self.parse_literal_string()?), - Some(Keyword::MANAGEDLOCATION) => { - managed_location = Some(self.parse_literal_string()?) - } - _ => break, - } - } - Ok(Statement::CreateDatabase { - db_name, - if_not_exists: ine, - location, - managed_location, - }) - } - - pub fn parse_optional_create_function_using( - &mut self, - ) -> Result, ParserError> { - if !self.parse_keyword(Keyword::USING) { - return Ok(None); - }; - let keyword = - self.expect_one_of_keywords(&[Keyword::JAR, Keyword::FILE, Keyword::ARCHIVE])?; - - let uri = self.parse_literal_string()?; - - match keyword { - Keyword::JAR => Ok(Some(CreateFunctionUsing::Jar(uri))), - Keyword::FILE => Ok(Some(CreateFunctionUsing::File(uri))), - Keyword::ARCHIVE => Ok(Some(CreateFunctionUsing::Archive(uri))), - _ => self.expected( - "JAR, FILE or ARCHIVE, got {:?}", - TokenWithSpan::wrap(Token::make_keyword(format!("{keyword:?}").as_str())), - ), - } - } - - pub fn parse_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - if dialect_of!(self is HiveDialect) { - self.parse_hive_create_function(or_replace, temporary) - } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { - self.parse_postgres_create_function(or_replace, temporary) - } else if dialect_of!(self is DuckDbDialect) { - self.parse_create_macro(or_replace, temporary) - } else if dialect_of!(self is BigQueryDialect) { - self.parse_bigquery_create_function(or_replace, temporary) - } else { - self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) - } - } - - /// Parse `CREATE FUNCTION` for [Postgres] - /// - /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html - fn parse_postgres_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = if self.consume_token(&Token::RParen) { - self.prev_token(); - None - } else { - Some(self.parse_comma_separated(Parser::parse_function_arg)?) - }; - - self.expect_token(&Token::RParen)?; - - let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) - } else { - None - }; - - #[derive(Default)] - struct Body { - language: Option, - behavior: Option, - function_body: Option, - called_on_null: Option, - parallel: Option, - } - let mut body = Body::default(); - loop { - fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { - if field.is_some() { - return Err(ParserError::ParserError(format!( - "{name} specified more than once", - ))); - } - Ok(()) - } - if self.parse_keyword(Keyword::AS) { - ensure_not_set(&body.function_body, "AS")?; - body.function_body = Some(CreateFunctionBody::AsBeforeOptions( - self.parse_create_function_body_string()?, - )); - } else if self.parse_keyword(Keyword::LANGUAGE) { - ensure_not_set(&body.language, "LANGUAGE")?; - body.language = Some(self.parse_identifier(false)?); - } else if self.parse_keyword(Keyword::IMMUTABLE) { - ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; - body.behavior = Some(FunctionBehavior::Immutable); - } else if self.parse_keyword(Keyword::STABLE) { - ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; - body.behavior = Some(FunctionBehavior::Stable); - } else if self.parse_keyword(Keyword::VOLATILE) { - ensure_not_set(&body.behavior, "IMMUTABLE | STABLE | VOLATILE")?; - body.behavior = Some(FunctionBehavior::Volatile); - } else if self.parse_keywords(&[ - Keyword::CALLED, - Keyword::ON, - Keyword::NULL, - Keyword::INPUT, - ]) { - ensure_not_set( - &body.called_on_null, - "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", - )?; - body.called_on_null = Some(FunctionCalledOnNull::CalledOnNullInput); - } else if self.parse_keywords(&[ - Keyword::RETURNS, - Keyword::NULL, - Keyword::ON, - Keyword::NULL, - Keyword::INPUT, - ]) { - ensure_not_set( - &body.called_on_null, - "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", - )?; - body.called_on_null = Some(FunctionCalledOnNull::ReturnsNullOnNullInput); - } else if self.parse_keyword(Keyword::STRICT) { - ensure_not_set( - &body.called_on_null, - "CALLED ON NULL INPUT | RETURNS NULL ON NULL INPUT | STRICT", - )?; - body.called_on_null = Some(FunctionCalledOnNull::Strict); - } else if self.parse_keyword(Keyword::PARALLEL) { - ensure_not_set(&body.parallel, "PARALLEL { UNSAFE | RESTRICTED | SAFE }")?; - if self.parse_keyword(Keyword::UNSAFE) { - body.parallel = Some(FunctionParallel::Unsafe); - } else if self.parse_keyword(Keyword::RESTRICTED) { - body.parallel = Some(FunctionParallel::Restricted); - } else if self.parse_keyword(Keyword::SAFE) { - body.parallel = Some(FunctionParallel::Safe); - } else { - return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); - } - } else if self.parse_keyword(Keyword::RETURN) { - ensure_not_set(&body.function_body, "RETURN")?; - body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); - } else { - break; - } - } - - Ok(Statement::CreateFunction(CreateFunction { - or_replace, - temporary, - name, - args, - return_type, - behavior: body.behavior, - called_on_null: body.called_on_null, - parallel: body.parallel, - language: body.language, - function_body: body.function_body, - if_not_exists: false, - using: None, - determinism_specifier: None, - options: None, - remote_connection: None, - })) - } - - /// Parse `CREATE FUNCTION` for [Hive] - /// - /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction - fn parse_hive_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; - - let as_ = self.parse_create_function_body_string()?; - let using = self.parse_optional_create_function_using()?; - - Ok(Statement::CreateFunction(CreateFunction { - or_replace, - temporary, - name, - function_body: Some(CreateFunctionBody::AsBeforeOptions(as_)), - using, - if_not_exists: false, - args: None, - return_type: None, - behavior: None, - called_on_null: None, - parallel: None, - language: None, - determinism_specifier: None, - options: None, - remote_connection: None, - })) - } - - /// Parse `CREATE FUNCTION` for [BigQuery] - /// - /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement - fn parse_bigquery_create_function( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_object_name(false)?; - - let parse_function_param = - |parser: &mut Parser| -> Result { - let name = parser.parse_identifier(false)?; - let data_type = parser.parse_data_type()?; - Ok(OperateFunctionArg { - mode: None, - name: Some(name), - data_type, - default_expr: None, - }) - }; - self.expect_token(&Token::LParen)?; - let args = self.parse_comma_separated0(parse_function_param, Token::RParen)?; - self.expect_token(&Token::RParen)?; - - let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) - } else { - None - }; - - let determinism_specifier = if self.parse_keyword(Keyword::DETERMINISTIC) { - Some(FunctionDeterminismSpecifier::Deterministic) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::DETERMINISTIC]) { - Some(FunctionDeterminismSpecifier::NotDeterministic) - } else { - None - }; - - let language = if self.parse_keyword(Keyword::LANGUAGE) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let remote_connection = - if self.parse_keywords(&[Keyword::REMOTE, Keyword::WITH, Keyword::CONNECTION]) { - Some(self.parse_object_name(false)?) - } else { - None - }; - - // `OPTIONS` may come before of after the function body but - // may be specified at most once. - let mut options = self.maybe_parse_options(Keyword::OPTIONS)?; - - let function_body = if remote_connection.is_none() { - self.expect_keyword(Keyword::AS)?; - let expr = self.parse_expr()?; - if options.is_none() { - options = self.maybe_parse_options(Keyword::OPTIONS)?; - Some(CreateFunctionBody::AsBeforeOptions(expr)) - } else { - Some(CreateFunctionBody::AsAfterOptions(expr)) - } - } else { - None - }; - - Ok(Statement::CreateFunction(CreateFunction { - or_replace, - temporary, - if_not_exists, - name, - args: Some(args), - return_type, - function_body, - language, - determinism_specifier, - options, - remote_connection, - using: None, - behavior: None, - called_on_null: None, - parallel: None, - })) - } - - fn parse_function_arg(&mut self) -> Result { - let mode = if self.parse_keyword(Keyword::IN) { - Some(ArgMode::In) - } else if self.parse_keyword(Keyword::OUT) { - Some(ArgMode::Out) - } else if self.parse_keyword(Keyword::INOUT) { - Some(ArgMode::InOut) - } else { - None - }; - - // parse: [ argname ] argtype - let mut name = None; - let mut data_type = self.parse_data_type()?; - if let DataType::Custom(n, _) = &data_type { - // the first token is actually a name - name = Some(n.0[0].clone()); - data_type = self.parse_data_type()?; - } - - let default_expr = if self.parse_keyword(Keyword::DEFAULT) || self.consume_token(&Token::Eq) - { - Some(self.parse_expr()?) - } else { - None - }; - Ok(OperateFunctionArg { - mode, - name, - data_type, - default_expr, - }) - } - - /// Parse statements of the DropTrigger type such as: - /// - /// ```sql - /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] - /// ``` - pub fn parse_drop_trigger(&mut self) -> Result { - if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { - self.prev_token(); - return self.expected("an object type after DROP", self.peek_token()); - } - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let trigger_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - let option = self - .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) - .map(|keyword| match keyword { - Keyword::CASCADE => ReferentialAction::Cascade, - Keyword::RESTRICT => ReferentialAction::Restrict, - _ => unreachable!(), - }); - Ok(Statement::DropTrigger { - if_exists, - trigger_name, - table_name, - option, - }) - } - - pub fn parse_create_trigger( - &mut self, - or_replace: bool, - is_constraint: bool, - ) -> Result { - if !dialect_of!(self is PostgreSqlDialect | GenericDialect) { - self.prev_token(); - return self.expected("an object type after CREATE", self.peek_token()); - } - - let name = self.parse_object_name(false)?; - let period = self.parse_trigger_period()?; - - let events = self.parse_keyword_separated(Keyword::OR, Parser::parse_trigger_event)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - - let referenced_table_name = if self.parse_keyword(Keyword::FROM) { - self.parse_object_name(true).ok() - } else { - None - }; - - let characteristics = self.parse_constraint_characteristics()?; - - let mut referencing = vec![]; - if self.parse_keyword(Keyword::REFERENCING) { - while let Some(refer) = self.parse_trigger_referencing()? { - referencing.push(refer); - } - } - - self.expect_keyword(Keyword::FOR)?; - let include_each = self.parse_keyword(Keyword::EACH); - let trigger_object = - match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { - Keyword::ROW => TriggerObject::Row, - Keyword::STATEMENT => TriggerObject::Statement, - _ => unreachable!(), - }; - - let condition = self - .parse_keyword(Keyword::WHEN) - .then(|| self.parse_expr()) - .transpose()?; - - self.expect_keyword(Keyword::EXECUTE)?; - - let exec_body = self.parse_trigger_exec_body()?; - - Ok(Statement::CreateTrigger { - or_replace, - is_constraint, - name, - period, - events, - table_name, - referenced_table_name, - referencing, - trigger_object, - include_each, - condition, - exec_body, - characteristics, - }) - } - - pub fn parse_trigger_period(&mut self) -> Result { - Ok( - match self.expect_one_of_keywords(&[ - Keyword::BEFORE, - Keyword::AFTER, - Keyword::INSTEAD, - ])? { - Keyword::BEFORE => TriggerPeriod::Before, - Keyword::AFTER => TriggerPeriod::After, - Keyword::INSTEAD => self - .expect_keyword(Keyword::OF) - .map(|_| TriggerPeriod::InsteadOf)?, - _ => unreachable!(), - }, - ) - } - - pub fn parse_trigger_event(&mut self) -> Result { - Ok( - match self.expect_one_of_keywords(&[ - Keyword::INSERT, - Keyword::UPDATE, - Keyword::DELETE, - Keyword::TRUNCATE, - ])? { - Keyword::INSERT => TriggerEvent::Insert, - Keyword::UPDATE => { - if self.parse_keyword(Keyword::OF) { - let cols = self.parse_comma_separated(|ident| { - Parser::parse_identifier(ident, false) - })?; - TriggerEvent::Update(cols) - } else { - TriggerEvent::Update(vec![]) - } - } - Keyword::DELETE => TriggerEvent::Delete, - Keyword::TRUNCATE => TriggerEvent::Truncate, - _ => unreachable!(), - }, - ) - } - - pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { - let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { - Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { - TriggerReferencingType::OldTable - } - Some(Keyword::NEW) if self.parse_keyword(Keyword::TABLE) => { - TriggerReferencingType::NewTable - } - _ => { - return Ok(None); - } - }; - - let is_as = self.parse_keyword(Keyword::AS); - let transition_relation_name = self.parse_object_name(false)?; - Ok(Some(TriggerReferencing { - refer_type, - is_as, - transition_relation_name, - })) - } - - pub fn parse_trigger_exec_body(&mut self) -> Result { - Ok(TriggerExecBody { - exec_type: match self - .expect_one_of_keywords(&[Keyword::FUNCTION, Keyword::PROCEDURE])? - { - Keyword::FUNCTION => TriggerExecBodyType::Function, - Keyword::PROCEDURE => TriggerExecBodyType::Procedure, - _ => unreachable!(), - }, - func_desc: self.parse_function_desc()?, - }) - } - - pub fn parse_create_macro( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - if dialect_of!(self is DuckDbDialect | GenericDialect) { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = if self.consume_token(&Token::RParen) { - self.prev_token(); - None - } else { - Some(self.parse_comma_separated(Parser::parse_macro_arg)?) - }; - - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::AS)?; - - Ok(Statement::CreateMacro { - or_replace, - temporary, - name, - args, - definition: if self.parse_keyword(Keyword::TABLE) { - MacroDefinition::Table(self.parse_query()?) - } else { - MacroDefinition::Expr(self.parse_expr()?) - }, - }) - } else { - self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) - } - } - - fn parse_macro_arg(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let default_expr = - if self.consume_token(&Token::Assignment) || self.consume_token(&Token::RArrow) { - Some(self.parse_expr()?) - } else { - None - }; - Ok(MacroArg { name, default_expr }) - } - - pub fn parse_create_external_table( - &mut self, - or_replace: bool, - ) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(false)?; - let (columns, constraints) = self.parse_columns()?; - - let hive_distribution = self.parse_hive_distribution()?; - let hive_formats = self.parse_hive_formats()?; - - let file_format = if let Some(ff) = &hive_formats.storage { - match ff { - HiveIOFormat::FileFormat { format } => Some(*format), - _ => None, - } - } else { - None - }; - let location = hive_formats.location.clone(); - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - Ok(CreateTableBuilder::new(table_name) - .columns(columns) - .constraints(constraints) - .hive_distribution(hive_distribution) - .hive_formats(Some(hive_formats)) - .table_properties(table_properties) - .or_replace(or_replace) - .if_not_exists(if_not_exists) - .external(true) - .file_format(file_format) - .location(location) - .build()) - } - - pub fn parse_file_format(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::AVRO => Ok(FileFormat::AVRO), - Keyword::JSONFILE => Ok(FileFormat::JSONFILE), - Keyword::ORC => Ok(FileFormat::ORC), - Keyword::PARQUET => Ok(FileFormat::PARQUET), - Keyword::RCFILE => Ok(FileFormat::RCFILE), - Keyword::SEQUENCEFILE => Ok(FileFormat::SEQUENCEFILE), - Keyword::TEXTFILE => Ok(FileFormat::TEXTFILE), - _ => self.expected("fileformat", next_token), - }, - _ => self.expected("fileformat", next_token), - } - } - - pub fn parse_analyze_format(&mut self) -> Result { - let next_token = self.next_token(); - match &next_token.token { - Token::Word(w) => match w.keyword { - Keyword::TEXT => Ok(AnalyzeFormat::TEXT), - Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ), - Keyword::JSON => Ok(AnalyzeFormat::JSON), - _ => self.expected("fileformat", next_token), - }, - _ => self.expected("fileformat", next_token), - } - } - - pub fn parse_create_view( - &mut self, - or_replace: bool, - temporary: bool, - ) -> Result { - let materialized = self.parse_keyword(Keyword::MATERIALIZED); - self.expect_keyword(Keyword::VIEW)?; - let if_not_exists = dialect_of!(self is BigQueryDialect|SQLiteDialect|GenericDialect) - && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). - // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. - let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); - let name = self.parse_object_name(allow_unquoted_hyphen)?; - let columns = self.parse_view_columns()?; - let mut options = CreateTableOptions::None; - let with_options = self.parse_options(Keyword::WITH)?; - if !with_options.is_empty() { - options = CreateTableOptions::With(with_options); - } - - let cluster_by = if self.parse_keyword(Keyword::CLUSTER) { - self.expect_keyword(Keyword::BY)?; - self.parse_parenthesized_column_list(Optional, false)? - } else { - vec![] - }; - - if dialect_of!(self is BigQueryDialect | GenericDialect) { - if let Some(opts) = self.maybe_parse_options(Keyword::OPTIONS)? { - if !opts.is_empty() { - options = CreateTableOptions::Options(opts); - } - }; - } - - let to = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::TO) - { - Some(self.parse_object_name(false)?) - } else { - None - }; - - let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COMMENT) - { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(str), - _ => self.expected("string literal", next_token)?, - } - } else { - None - }; - - self.expect_keyword(Keyword::AS)?; - let query = self.parse_query()?; - // Optional `WITH [ CASCADED | LOCAL ] CHECK OPTION` is widely supported here. - - let with_no_schema_binding = dialect_of!(self is RedshiftSqlDialect | GenericDialect) - && self.parse_keywords(&[ - Keyword::WITH, - Keyword::NO, - Keyword::SCHEMA, - Keyword::BINDING, - ]); - - Ok(Statement::CreateView { - name, - columns, - query, - materialized, - or_replace, - options, - cluster_by, - comment, - with_no_schema_binding, - if_not_exists, - temporary, - to, - }) - } - - pub fn parse_create_role(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; - - let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] - - let optional_keywords = if dialect_of!(self is MsSqlDialect) { - vec![Keyword::AUTHORIZATION] - } else if dialect_of!(self is PostgreSqlDialect) { - vec![ - Keyword::LOGIN, - Keyword::NOLOGIN, - Keyword::INHERIT, - Keyword::NOINHERIT, - Keyword::BYPASSRLS, - Keyword::NOBYPASSRLS, - Keyword::PASSWORD, - Keyword::CREATEDB, - Keyword::NOCREATEDB, - Keyword::CREATEROLE, - Keyword::NOCREATEROLE, - Keyword::SUPERUSER, - Keyword::NOSUPERUSER, - Keyword::REPLICATION, - Keyword::NOREPLICATION, - Keyword::CONNECTION, - Keyword::VALID, - Keyword::IN, - Keyword::ROLE, - Keyword::ADMIN, - Keyword::USER, - ] - } else { - vec![] - }; - - // MSSQL - let mut authorization_owner = None; - // Postgres - let mut login = None; - let mut inherit = None; - let mut bypassrls = None; - let mut password = None; - let mut create_db = None; - let mut create_role = None; - let mut superuser = None; - let mut replication = None; - let mut connection_limit = None; - let mut valid_until = None; - let mut in_role = vec![]; - let mut in_group = vec![]; - let mut role = vec![]; - let mut user = vec![]; - let mut admin = vec![]; - - while let Some(keyword) = self.parse_one_of_keywords(&optional_keywords) { - let loc = self - .tokens - .get(self.index - 1) - .map_or(Location { line: 0, column: 0 }, |t| t.span.start); - match keyword { - Keyword::AUTHORIZATION => { - if authorization_owner.is_some() { - parser_err!("Found multiple AUTHORIZATION", loc) - } else { - authorization_owner = Some(self.parse_object_name(false)?); - Ok(()) - } - } - Keyword::LOGIN | Keyword::NOLOGIN => { - if login.is_some() { - parser_err!("Found multiple LOGIN or NOLOGIN", loc) - } else { - login = Some(keyword == Keyword::LOGIN); - Ok(()) - } - } - Keyword::INHERIT | Keyword::NOINHERIT => { - if inherit.is_some() { - parser_err!("Found multiple INHERIT or NOINHERIT", loc) - } else { - inherit = Some(keyword == Keyword::INHERIT); - Ok(()) - } - } - Keyword::BYPASSRLS | Keyword::NOBYPASSRLS => { - if bypassrls.is_some() { - parser_err!("Found multiple BYPASSRLS or NOBYPASSRLS", loc) - } else { - bypassrls = Some(keyword == Keyword::BYPASSRLS); - Ok(()) - } - } - Keyword::CREATEDB | Keyword::NOCREATEDB => { - if create_db.is_some() { - parser_err!("Found multiple CREATEDB or NOCREATEDB", loc) - } else { - create_db = Some(keyword == Keyword::CREATEDB); - Ok(()) - } - } - Keyword::CREATEROLE | Keyword::NOCREATEROLE => { - if create_role.is_some() { - parser_err!("Found multiple CREATEROLE or NOCREATEROLE", loc) - } else { - create_role = Some(keyword == Keyword::CREATEROLE); - Ok(()) - } - } - Keyword::SUPERUSER | Keyword::NOSUPERUSER => { - if superuser.is_some() { - parser_err!("Found multiple SUPERUSER or NOSUPERUSER", loc) - } else { - superuser = Some(keyword == Keyword::SUPERUSER); - Ok(()) - } - } - Keyword::REPLICATION | Keyword::NOREPLICATION => { - if replication.is_some() { - parser_err!("Found multiple REPLICATION or NOREPLICATION", loc) - } else { - replication = Some(keyword == Keyword::REPLICATION); - Ok(()) - } - } - Keyword::PASSWORD => { - if password.is_some() { - parser_err!("Found multiple PASSWORD", loc) - } else { - password = if self.parse_keyword(Keyword::NULL) { - Some(Password::NullPassword) - } else { - Some(Password::Password(Expr::Value(self.parse_value()?))) - }; - Ok(()) - } - } - Keyword::CONNECTION => { - self.expect_keyword(Keyword::LIMIT)?; - if connection_limit.is_some() { - parser_err!("Found multiple CONNECTION LIMIT", loc) - } else { - connection_limit = Some(Expr::Value(self.parse_number_value()?)); - Ok(()) - } - } - Keyword::VALID => { - self.expect_keyword(Keyword::UNTIL)?; - if valid_until.is_some() { - parser_err!("Found multiple VALID UNTIL", loc) - } else { - valid_until = Some(Expr::Value(self.parse_value()?)); - Ok(()) - } - } - Keyword::IN => { - if self.parse_keyword(Keyword::ROLE) { - if !in_role.is_empty() { - parser_err!("Found multiple IN ROLE", loc) - } else { - in_role = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } else if self.parse_keyword(Keyword::GROUP) { - if !in_group.is_empty() { - parser_err!("Found multiple IN GROUP", loc) - } else { - in_group = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } else { - self.expected("ROLE or GROUP after IN", self.peek_token()) - } - } - Keyword::ROLE => { - if !role.is_empty() { - parser_err!("Found multiple ROLE", loc) - } else { - role = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } - Keyword::USER => { - if !user.is_empty() { - parser_err!("Found multiple USER", loc) - } else { - user = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } - Keyword::ADMIN => { - if !admin.is_empty() { - parser_err!("Found multiple ADMIN", loc) - } else { - admin = self.parse_comma_separated(|p| p.parse_identifier(false))?; - Ok(()) - } - } - _ => break, - }? - } - - Ok(Statement::CreateRole { - names, - if_not_exists, - login, - inherit, - bypassrls, - password, - create_db, - create_role, - replication, - superuser, - connection_limit, - valid_until, - in_role, - in_group, - role, - user, - admin, - authorization_owner, - }) - } - - pub fn parse_owner(&mut self) -> Result { - let owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { - Some(Keyword::CURRENT_USER) => Owner::CurrentUser, - Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, - Some(Keyword::SESSION_USER) => Owner::SessionUser, - Some(_) => unreachable!(), - None => { - match self.parse_identifier(false) { - Ok(ident) => Owner::Ident(ident), - Err(e) => { - return Err(ParserError::ParserError(format!("Expected: CURRENT_USER, CURRENT_ROLE, SESSION_USER or identifier after OWNER TO. {e}"))) - } - } - } - }; - Ok(owner) - } - - /// ```sql - /// CREATE POLICY name ON table_name [ AS { PERMISSIVE | RESTRICTIVE } ] - /// [ FOR { ALL | SELECT | INSERT | UPDATE | DELETE } ] - /// [ TO { role_name | PUBLIC | CURRENT_USER | CURRENT_ROLE | SESSION_USER } [, ...] ] - /// [ USING ( using_expression ) ] - /// [ WITH CHECK ( with_check_expression ) ] - /// ``` - /// - /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) - pub fn parse_create_policy(&mut self) -> Result { - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - - let policy_type = if self.parse_keyword(Keyword::AS) { - let keyword = - self.expect_one_of_keywords(&[Keyword::PERMISSIVE, Keyword::RESTRICTIVE])?; - Some(match keyword { - Keyword::PERMISSIVE => CreatePolicyType::Permissive, - Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, - _ => unreachable!(), - }) - } else { - None - }; - - let command = if self.parse_keyword(Keyword::FOR) { - let keyword = self.expect_one_of_keywords(&[ - Keyword::ALL, - Keyword::SELECT, - Keyword::INSERT, - Keyword::UPDATE, - Keyword::DELETE, - ])?; - Some(match keyword { - Keyword::ALL => CreatePolicyCommand::All, - Keyword::SELECT => CreatePolicyCommand::Select, - Keyword::INSERT => CreatePolicyCommand::Insert, - Keyword::UPDATE => CreatePolicyCommand::Update, - Keyword::DELETE => CreatePolicyCommand::Delete, - _ => unreachable!(), - }) - } else { - None - }; - - let to = if self.parse_keyword(Keyword::TO) { - Some(self.parse_comma_separated(|p| p.parse_owner())?) - } else { - None - }; - - let using = if self.parse_keyword(Keyword::USING) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(expr) - } else { - None - }; - - let with_check = if self.parse_keywords(&[Keyword::WITH, Keyword::CHECK]) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(expr) - } else { - None - }; - - Ok(CreatePolicy { - name, - table_name, - policy_type, - command, - to, - using, - with_check, - }) - } - - pub fn parse_drop(&mut self) -> Result { - // MySQL dialect supports `TEMPORARY` - let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) - && self.parse_keyword(Keyword::TEMPORARY); - let persistent = dialect_of!(self is DuckDbDialect) - && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); - - let object_type = if self.parse_keyword(Keyword::TABLE) { - ObjectType::Table - } else if self.parse_keyword(Keyword::VIEW) { - ObjectType::View - } else if self.parse_keyword(Keyword::INDEX) { - ObjectType::Index - } else if self.parse_keyword(Keyword::ROLE) { - ObjectType::Role - } else if self.parse_keyword(Keyword::SCHEMA) { - ObjectType::Schema - } else if self.parse_keyword(Keyword::DATABASE) { - ObjectType::Database - } else if self.parse_keyword(Keyword::SEQUENCE) { - ObjectType::Sequence - } else if self.parse_keyword(Keyword::STAGE) { - ObjectType::Stage - } else if self.parse_keyword(Keyword::TYPE) { - ObjectType::Type - } else if self.parse_keyword(Keyword::FUNCTION) { - return self.parse_drop_function(); - } else if self.parse_keyword(Keyword::POLICY) { - return self.parse_drop_policy(); - } else if self.parse_keyword(Keyword::PROCEDURE) { - return self.parse_drop_procedure(); - } else if self.parse_keyword(Keyword::SECRET) { - return self.parse_drop_secret(temporary, persistent); - } else if self.parse_keyword(Keyword::TRIGGER) { - return self.parse_drop_trigger(); - } else { - return self.expected( - "TABLE, VIEW, INDEX, ROLE, SCHEMA, DATABASE, FUNCTION, PROCEDURE, STAGE, TRIGGER, SECRET, SEQUENCE, or TYPE after DROP", - self.peek_token(), - ); - }; - // Many dialects support the non-standard `IF EXISTS` clause and allow - // specifying multiple objects to delete in a single statement - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; - - let loc = self.peek_token().span.start; - let cascade = self.parse_keyword(Keyword::CASCADE); - let restrict = self.parse_keyword(Keyword::RESTRICT); - let purge = self.parse_keyword(Keyword::PURGE); - if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in DROP", loc); - } - if object_type == ObjectType::Role && (cascade || restrict || purge) { - return parser_err!( - "Cannot specify CASCADE, RESTRICT, or PURGE in DROP ROLE", - loc - ); - } - Ok(Statement::Drop { - object_type, - if_exists, - names, - cascade, - restrict, - purge, - temporary, - }) - } - - fn parse_optional_referential_action(&mut self) -> Option { - match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { - Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), - Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), - _ => None, - } - } - - /// ```sql - /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] - /// [ CASCADE | RESTRICT ] - /// ``` - fn parse_drop_function(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; - let option = self.parse_optional_referential_action(); - Ok(Statement::DropFunction { - if_exists, - func_desc, - option, - }) - } - - /// ```sql - /// DROP POLICY [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] - /// ``` - /// - /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) - fn parse_drop_policy(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::ON)?; - let table_name = self.parse_object_name(false)?; - let option = self.parse_optional_referential_action(); - Ok(Statement::DropPolicy { - if_exists, - name, - table_name, - option, - }) - } - - /// ```sql - /// DROP PROCEDURE [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] - /// [ CASCADE | RESTRICT ] - /// ``` - fn parse_drop_procedure(&mut self) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let proc_desc = self.parse_comma_separated(Parser::parse_function_desc)?; - let option = self.parse_optional_referential_action(); - Ok(Statement::DropProcedure { - if_exists, - proc_desc, - option, - }) - } - - fn parse_function_desc(&mut self) -> Result { - let name = self.parse_object_name(false)?; - - let args = if self.consume_token(&Token::LParen) { - if self.consume_token(&Token::RParen) { - None - } else { - let args = self.parse_comma_separated(Parser::parse_function_arg)?; - self.expect_token(&Token::RParen)?; - Some(args) - } - } else { - None - }; - - Ok(FunctionDesc { name, args }) - } - - /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. - fn parse_drop_secret( - &mut self, - temporary: bool, - persistent: bool, - ) -> Result { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let storage_specifier = if self.parse_keyword(Keyword::FROM) { - self.parse_identifier(false).ok() - } else { - None - }; - let temp = match (temporary, persistent) { - (true, false) => Some(true), - (false, true) => Some(false), - (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, - }; - - Ok(Statement::DropSecret { - if_exists, - temporary: temp, - name, - storage_specifier, - }) - } - - /// Parse a `DECLARE` statement. - /// - /// ```sql - /// DECLARE name [ BINARY ] [ ASENSITIVE | INSENSITIVE ] [ [ NO ] SCROLL ] - /// CURSOR [ { WITH | WITHOUT } HOLD ] FOR query - /// ``` - /// - /// The syntax can vary significantly between warehouses. See the grammar - /// on the warehouse specific function in such cases. - pub fn parse_declare(&mut self) -> Result { - if dialect_of!(self is BigQueryDialect) { - return self.parse_big_query_declare(); - } - if dialect_of!(self is SnowflakeDialect) { - return self.parse_snowflake_declare(); - } - if dialect_of!(self is MsSqlDialect) { - return self.parse_mssql_declare(); - } - - let name = self.parse_identifier(false)?; - - let binary = Some(self.parse_keyword(Keyword::BINARY)); - let sensitive = if self.parse_keyword(Keyword::INSENSITIVE) { - Some(true) - } else if self.parse_keyword(Keyword::ASENSITIVE) { - Some(false) - } else { - None - }; - let scroll = if self.parse_keyword(Keyword::SCROLL) { - Some(true) - } else if self.parse_keywords(&[Keyword::NO, Keyword::SCROLL]) { - Some(false) - } else { - None - }; - - self.expect_keyword(Keyword::CURSOR)?; - let declare_type = Some(DeclareType::Cursor); - - let hold = match self.parse_one_of_keywords(&[Keyword::WITH, Keyword::WITHOUT]) { - Some(keyword) => { - self.expect_keyword(Keyword::HOLD)?; - - match keyword { - Keyword::WITH => Some(true), - Keyword::WITHOUT => Some(false), - _ => unreachable!(), - } - } - None => None, - }; - - self.expect_keyword(Keyword::FOR)?; - - let query = Some(self.parse_query()?); - - Ok(Statement::Declare { - stmts: vec![Declare { - names: vec![name], - data_type: None, - assignment: None, - declare_type, - binary, - sensitive, - scroll, - hold, - for_query: query, - }], - }) - } - - /// Parse a [BigQuery] `DECLARE` statement. - /// - /// Syntax: - /// ```text - /// DECLARE variable_name[, ...] [{ | }]; - /// ``` - /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/procedural-language#declare - pub fn parse_big_query_declare(&mut self) -> Result { - let names = self.parse_comma_separated(|parser| Parser::parse_identifier(parser, false))?; - - let data_type = match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::DEFAULT => None, - _ => Some(self.parse_data_type()?), - }; - - let expr = if data_type.is_some() { - if self.parse_keyword(Keyword::DEFAULT) { - Some(self.parse_expr()?) - } else { - None - } - } else { - // If no variable type - default expression must be specified, per BQ docs. - // i.e `DECLARE foo;` is invalid. - self.expect_keyword(Keyword::DEFAULT)?; - Some(self.parse_expr()?) - }; - - Ok(Statement::Declare { - stmts: vec![Declare { - names, - data_type, - assignment: expr.map(|expr| DeclareAssignment::Default(Box::new(expr))), - declare_type: None, - binary: None, - sensitive: None, - scroll: None, - hold: None, - for_query: None, - }], - }) - } - - /// Parse a [Snowflake] `DECLARE` statement. - /// - /// Syntax: - /// ```text - /// DECLARE - /// [{ - /// | - /// | - /// | }; ... ] - /// - /// - /// [] [ { DEFAULT | := } ] - /// - /// - /// CURSOR FOR - /// - /// - /// RESULTSET [ { DEFAULT | := } ( ) ] ; - /// - /// - /// EXCEPTION [ ( , '' ) ] ; - /// ``` - /// - /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/snowflake-scripting/declare - pub fn parse_snowflake_declare(&mut self) -> Result { - let mut stmts = vec![]; - loop { - let name = self.parse_identifier(false)?; - let (declare_type, for_query, assigned_expr, data_type) = - if self.parse_keyword(Keyword::CURSOR) { - self.expect_keyword(Keyword::FOR)?; - match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::SELECT => ( - Some(DeclareType::Cursor), - Some(self.parse_query()?), - None, - None, - ), - _ => ( - Some(DeclareType::Cursor), - None, - Some(DeclareAssignment::For(Box::new(self.parse_expr()?))), - None, - ), - } - } else if self.parse_keyword(Keyword::RESULTSET) { - let assigned_expr = if self.peek_token().token != Token::SemiColon { - self.parse_snowflake_variable_declaration_expression()? - } else { - // Nothing more to do. The statement has no further parameters. - None - }; - - (Some(DeclareType::ResultSet), None, assigned_expr, None) - } else if self.parse_keyword(Keyword::EXCEPTION) { - let assigned_expr = if self.peek_token().token == Token::LParen { - Some(DeclareAssignment::Expr(Box::new(self.parse_expr()?))) - } else { - // Nothing more to do. The statement has no further parameters. - None - }; - - (Some(DeclareType::Exception), None, assigned_expr, None) - } else { - // Without an explicit keyword, the only valid option is variable declaration. - let (assigned_expr, data_type) = if let Some(assigned_expr) = - self.parse_snowflake_variable_declaration_expression()? - { - (Some(assigned_expr), None) - } else if let Token::Word(_) = self.peek_token().token { - let data_type = self.parse_data_type()?; - ( - self.parse_snowflake_variable_declaration_expression()?, - Some(data_type), - ) - } else { - (None, None) - }; - (None, None, assigned_expr, data_type) - }; - let stmt = Declare { - names: vec![name], - data_type, - assignment: assigned_expr, - declare_type, - binary: None, - sensitive: None, - scroll: None, - hold: None, - for_query, - }; - - stmts.push(stmt); - if self.consume_token(&Token::SemiColon) { - match self.peek_token().token { - Token::Word(w) - if ALL_KEYWORDS - .binary_search(&w.value.to_uppercase().as_str()) - .is_err() => - { - // Not a keyword - start of a new declaration. - continue; - } - _ => { - // Put back the semicolon, this is the end of the DECLARE statement. - self.prev_token(); - } - } - } - - break; - } - - Ok(Statement::Declare { stmts }) - } - - /// Parse a [MsSql] `DECLARE` statement. - /// - /// Syntax: - /// ```text - /// DECLARE - // { - // { @local_variable [AS] data_type [ = value ] } - // | { @cursor_variable_name CURSOR } - // } [ ,...n ] - /// ``` - /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 - pub fn parse_mssql_declare(&mut self) -> Result { - let stmts = self.parse_comma_separated(Parser::parse_mssql_declare_stmt)?; - - Ok(Statement::Declare { stmts }) - } - - /// Parse the body of a [MsSql] `DECLARE`statement. - /// - /// Syntax: - /// ```text - // { - // { @local_variable [AS] data_type [ = value ] } - // | { @cursor_variable_name CURSOR } - // } [ ,...n ] - /// ``` - /// [MsSql]: https://learn.microsoft.com/en-us/sql/t-sql/language-elements/declare-local-variable-transact-sql?view=sql-server-ver16 - pub fn parse_mssql_declare_stmt(&mut self) -> Result { - let name = { - let ident = self.parse_identifier(false)?; - if !ident.value.starts_with('@') { - Err(ParserError::TokenizerError( - "Invalid MsSql variable declaration.".to_string(), - )) - } else { - Ok(ident) - } - }?; - - let (declare_type, data_type) = match self.peek_token().token { - Token::Word(w) => match w.keyword { - Keyword::CURSOR => { - self.next_token(); - (Some(DeclareType::Cursor), None) - } - Keyword::AS => { - self.next_token(); - (None, Some(self.parse_data_type()?)) - } - _ => (None, Some(self.parse_data_type()?)), - }, - _ => (None, Some(self.parse_data_type()?)), - }; - - let assignment = self.parse_mssql_variable_declaration_expression()?; - - Ok(Declare { - names: vec![name], - data_type, - assignment, - declare_type, - binary: None, - sensitive: None, - scroll: None, - hold: None, - for_query: None, - }) - } - - /// Parses the assigned expression in a variable declaration. - /// - /// Syntax: - /// ```text - /// [ { DEFAULT | := } ] - /// ``` - /// - pub fn parse_snowflake_variable_declaration_expression( - &mut self, - ) -> Result, ParserError> { - Ok(match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::DEFAULT => { - self.next_token(); // Skip `DEFAULT` - Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) - } - Token::Assignment => { - self.next_token(); // Skip `:=` - Some(DeclareAssignment::DuckAssignment(Box::new( - self.parse_expr()?, - ))) - } - _ => None, - }) - } - - /// Parses the assigned expression in a variable declaration. - /// - /// Syntax: - /// ```text - /// [ = ] - /// ``` - pub fn parse_mssql_variable_declaration_expression( - &mut self, - ) -> Result, ParserError> { - Ok(match self.peek_token().token { - Token::Eq => { - self.next_token(); // Skip `=` - Some(DeclareAssignment::MsSqlAssignment(Box::new( - self.parse_expr()?, - ))) - } - _ => None, - }) - } - - // FETCH [ direction { FROM | IN } ] cursor INTO target; - pub fn parse_fetch_statement(&mut self) -> Result { - let direction = if self.parse_keyword(Keyword::NEXT) { - FetchDirection::Next - } else if self.parse_keyword(Keyword::PRIOR) { - FetchDirection::Prior - } else if self.parse_keyword(Keyword::FIRST) { - FetchDirection::First - } else if self.parse_keyword(Keyword::LAST) { - FetchDirection::Last - } else if self.parse_keyword(Keyword::ABSOLUTE) { - FetchDirection::Absolute { - limit: self.parse_number_value()?, - } - } else if self.parse_keyword(Keyword::RELATIVE) { - FetchDirection::Relative { - limit: self.parse_number_value()?, - } - } else if self.parse_keyword(Keyword::FORWARD) { - if self.parse_keyword(Keyword::ALL) { - FetchDirection::ForwardAll - } else { - FetchDirection::Forward { - // TODO: Support optional - limit: Some(self.parse_number_value()?), - } - } - } else if self.parse_keyword(Keyword::BACKWARD) { - if self.parse_keyword(Keyword::ALL) { - FetchDirection::BackwardAll - } else { - FetchDirection::Backward { - // TODO: Support optional - limit: Some(self.parse_number_value()?), - } - } - } else if self.parse_keyword(Keyword::ALL) { - FetchDirection::All - } else { - FetchDirection::Count { - limit: self.parse_number_value()?, - } - }; - - self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; - - let name = self.parse_identifier(false)?; - - let into = if self.parse_keyword(Keyword::INTO) { - Some(self.parse_object_name(false)?) - } else { - None - }; - - Ok(Statement::Fetch { - name, - direction, - into, - }) - } - - pub fn parse_discard(&mut self) -> Result { - let object_type = if self.parse_keyword(Keyword::ALL) { - DiscardObject::ALL - } else if self.parse_keyword(Keyword::PLANS) { - DiscardObject::PLANS - } else if self.parse_keyword(Keyword::SEQUENCES) { - DiscardObject::SEQUENCES - } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { - DiscardObject::TEMP - } else { - return self.expected( - "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", - self.peek_token(), - ); - }; - Ok(Statement::Discard { object_type }) - } - - pub fn parse_create_index(&mut self, unique: bool) -> Result { - let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let index_name = if if_not_exists || !self.parse_keyword(Keyword::ON) { - let index_name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::ON)?; - Some(index_name) - } else { - None - }; - let table_name = self.parse_object_name(false)?; - let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_identifier(false)?) - } else { - None - }; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_order_by_expr)?; - self.expect_token(&Token::RParen)?; - - let include = if self.parse_keyword(Keyword::INCLUDE) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - columns - } else { - vec![] - }; - - let nulls_distinct = if self.parse_keyword(Keyword::NULLS) { - let not = self.parse_keyword(Keyword::NOT); - self.expect_keyword(Keyword::DISTINCT)?; - Some(!not) - } else { - None - }; - - let with = if self.dialect.supports_create_index_with_clause() - && self.parse_keyword(Keyword::WITH) - { - self.expect_token(&Token::LParen)?; - let with_params = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - with_params - } else { - Vec::new() - }; - - let predicate = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(Statement::CreateIndex(CreateIndex { - name: index_name, - table_name, - using, - columns, - unique, - concurrently, - if_not_exists, - include, - nulls_distinct, - with, - predicate, - })) - } - - pub fn parse_create_extension(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - - let (schema, version, cascade) = if self.parse_keyword(Keyword::WITH) { - let schema = if self.parse_keyword(Keyword::SCHEMA) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let version = if self.parse_keyword(Keyword::VERSION) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let cascade = self.parse_keyword(Keyword::CASCADE); - - (schema, version, cascade) - } else { - (None, None, false) - }; - - Ok(Statement::CreateExtension { - name, - if_not_exists, - schema, - version, - cascade, - }) - } - - //TODO: Implement parsing for Skewed - pub fn parse_hive_distribution(&mut self) -> Result { - if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(HiveDistributionStyle::PARTITIONED { columns }) - } else { - Ok(HiveDistributionStyle::NONE) - } - } - - pub fn parse_hive_formats(&mut self) -> Result { - let mut hive_format = HiveFormat::default(); - loop { - match self.parse_one_of_keywords(&[ - Keyword::ROW, - Keyword::STORED, - Keyword::LOCATION, - Keyword::WITH, - ]) { - Some(Keyword::ROW) => { - hive_format.row_format = Some(self.parse_row_format()?); - } - Some(Keyword::STORED) => { - self.expect_keyword(Keyword::AS)?; - if self.parse_keyword(Keyword::INPUTFORMAT) { - let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::OUTPUTFORMAT)?; - let output_format = self.parse_expr()?; - hive_format.storage = Some(HiveIOFormat::IOF { - input_format, - output_format, - }); - } else { - let format = self.parse_file_format()?; - hive_format.storage = Some(HiveIOFormat::FileFormat { format }); - } - } - Some(Keyword::LOCATION) => { - hive_format.location = Some(self.parse_literal_string()?); - } - Some(Keyword::WITH) => { - self.prev_token(); - let properties = self - .parse_options_with_keywords(&[Keyword::WITH, Keyword::SERDEPROPERTIES])?; - if !properties.is_empty() { - hive_format.serde_properties = Some(properties); - } else { - break; - } - } - None => break, - _ => break, - } - } - - Ok(hive_format) - } - - pub fn parse_row_format(&mut self) -> Result { - self.expect_keyword(Keyword::FORMAT)?; - match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { - Some(Keyword::SERDE) => { - let class = self.parse_literal_string()?; - Ok(HiveRowFormat::SERDE { class }) - } - _ => { - let mut row_delimiters = vec![]; - - loop { - match self.parse_one_of_keywords(&[ - Keyword::FIELDS, - Keyword::COLLECTION, - Keyword::MAP, - Keyword::LINES, - Keyword::NULL, - ]) { - Some(Keyword::FIELDS) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsTerminatedBy, - char: self.parse_identifier(false)?, - }); - - if self.parse_keywords(&[Keyword::ESCAPED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::FieldsEscapedBy, - char: self.parse_identifier(false)?, - }); - } - } else { - break; - } - } - Some(Keyword::COLLECTION) => { - if self.parse_keywords(&[ - Keyword::ITEMS, - Keyword::TERMINATED, - Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::CollectionItemsTerminatedBy, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - Some(Keyword::MAP) => { - if self.parse_keywords(&[ - Keyword::KEYS, - Keyword::TERMINATED, - Keyword::BY, - ]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::MapKeysTerminatedBy, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - Some(Keyword::LINES) => { - if self.parse_keywords(&[Keyword::TERMINATED, Keyword::BY]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::LinesTerminatedBy, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - Some(Keyword::NULL) => { - if self.parse_keywords(&[Keyword::DEFINED, Keyword::AS]) { - row_delimiters.push(HiveRowDelimiter { - delimiter: HiveDelimiter::NullDefinedAs, - char: self.parse_identifier(false)?, - }); - } else { - break; - } - } - _ => { - break; - } - } - } - - Ok(HiveRowFormat::DELIMITED { - delimiters: row_delimiters, - }) - } - } - } - - fn parse_optional_on_cluster(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ON, Keyword::CLUSTER]) { - Ok(Some(self.parse_identifier(false)?)) - } else { - Ok(None) - } - } - - pub fn parse_create_table( - &mut self, - or_replace: bool, - temporary: bool, - global: Option, - transient: bool, - ) -> Result { - let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let table_name = self.parse_object_name(allow_unquoted_hyphen)?; - - // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs - let on_cluster = self.parse_optional_on_cluster()?; - - let like = if self.parse_keyword(Keyword::LIKE) || self.parse_keyword(Keyword::ILIKE) { - self.parse_object_name(allow_unquoted_hyphen).ok() - } else { - None - }; - - let clone = if self.parse_keyword(Keyword::CLONE) { - self.parse_object_name(allow_unquoted_hyphen).ok() - } else { - None - }; - - // parse optional column list (schema) - let (columns, constraints) = self.parse_columns()?; - let mut comment = if dialect_of!(self is HiveDialect) - && self.parse_keyword(Keyword::COMMENT) - { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(CommentDef::AfterColumnDefsWithoutEq(str)), - _ => self.expected("comment", next_token)?, - } - } else { - None - }; - - // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` - let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); - - let hive_distribution = self.parse_hive_distribution()?; - let clustered_by = self.parse_optional_clustered_by()?; - let hive_formats = self.parse_hive_formats()?; - // PostgreSQL supports `WITH ( options )`, before `AS` - let with_options = self.parse_options(Keyword::WITH)?; - let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; - - let engine = if self.parse_keyword(Keyword::ENGINE) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => { - let name = w.value; - let parameters = if self.peek_token() == Token::LParen { - Some(self.parse_parenthesized_identifiers()?) - } else { - None - }; - Some(TableEngine { name, parameters }) - } - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let auto_increment_offset = if self.parse_keyword(Keyword::AUTO_INCREMENT) { - let _ = self.consume_token(&Token::Eq); - let next_token = self.next_token(); - match next_token.token { - Token::Number(s, _) => Some(Self::parse::(s, next_token.span.start)?), - _ => self.expected("literal int", next_token)?, - } - } else { - None - }; - - // ClickHouse supports `PRIMARY KEY`, before `ORDER BY` - // https://clickhouse.com/docs/en/sql-reference/statements/create/table#primary-key - let primary_key = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - if self.consume_token(&Token::LParen) { - let columns = if self.peek_token() != Token::RParen { - self.parse_comma_separated(|p| p.parse_expr())? - } else { - vec![] - }; - self.expect_token(&Token::RParen)?; - Some(OneOrManyWithParens::Many(columns)) - } else { - Some(OneOrManyWithParens::One(self.parse_expr()?)) - } - } else { - None - }; - - let create_table_config = self.parse_optional_create_table_config()?; - - let default_charset = if self.parse_keywords(&[Keyword::DEFAULT, Keyword::CHARSET]) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => Some(w.value), - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let collation = if self.parse_keywords(&[Keyword::COLLATE]) { - self.expect_token(&Token::Eq)?; - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => Some(w.value), - _ => self.expected("identifier", next_token)?, - } - } else { - None - }; - - let on_commit: Option = - if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DELETE, Keyword::ROWS]) - { - Some(OnCommit::DeleteRows) - } else if self.parse_keywords(&[ - Keyword::ON, - Keyword::COMMIT, - Keyword::PRESERVE, - Keyword::ROWS, - ]) { - Some(OnCommit::PreserveRows) - } else if self.parse_keywords(&[Keyword::ON, Keyword::COMMIT, Keyword::DROP]) { - Some(OnCommit::Drop) - } else { - None - }; - - let strict = self.parse_keyword(Keyword::STRICT); - - // Excludes Hive dialect here since it has been handled after table column definitions. - if !dialect_of!(self is HiveDialect) && self.parse_keyword(Keyword::COMMENT) { - // rewind the COMMENT keyword - self.prev_token(); - comment = self.parse_optional_inline_comment()? - }; - - // Parse optional `AS ( query )` - let query = if self.parse_keyword(Keyword::AS) { - Some(self.parse_query()?) - } else if self.dialect.supports_create_table_select() && self.parse_keyword(Keyword::SELECT) - { - // rewind the SELECT keyword - self.prev_token(); - Some(self.parse_query()?) - } else { - None - }; - - Ok(CreateTableBuilder::new(table_name) - .temporary(temporary) - .columns(columns) - .constraints(constraints) - .with_options(with_options) - .table_properties(table_properties) - .or_replace(or_replace) - .if_not_exists(if_not_exists) - .transient(transient) - .hive_distribution(hive_distribution) - .hive_formats(Some(hive_formats)) - .global(global) - .query(query) - .without_rowid(without_rowid) - .like(like) - .clone_clause(clone) - .engine(engine) - .comment(comment) - .auto_increment_offset(auto_increment_offset) - .order_by(order_by) - .default_charset(default_charset) - .collation(collation) - .on_commit(on_commit) - .on_cluster(on_cluster) - .clustered_by(clustered_by) - .partition_by(create_table_config.partition_by) - .cluster_by(create_table_config.cluster_by) - .options(create_table_config.options) - .primary_key(primary_key) - .strict(strict) - .build()) - } - - /// Parse configuration like partitioning, clustering information during the table creation. - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) - /// [PostgreSQL](https://www.postgresql.org/docs/current/ddl-partitioning.html) - fn parse_optional_create_table_config( - &mut self, - ) -> Result { - let partition_by = if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) - && self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) - { - Some(Box::new(self.parse_expr()?)) - } else { - None - }; - - let mut cluster_by = None; - let mut options = None; - if dialect_of!(self is BigQueryDialect | GenericDialect) { - if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - cluster_by = Some(WrappedCollection::NoWrapping( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - )); - }; - - if let Token::Word(word) = self.peek_token().token { - if word.keyword == Keyword::OPTIONS { - options = Some(self.parse_options(Keyword::OPTIONS)?); - } - }; - } - - Ok(CreateTableConfiguration { - partition_by, - cluster_by, - options, - }) - } - - pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { - let comment = if self.parse_keyword(Keyword::COMMENT) { - let has_eq = self.consume_token(&Token::Eq); - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(str) => Some(if has_eq { - CommentDef::WithEq(str) - } else { - CommentDef::WithoutEq(str) - }), - _ => self.expected("comment", next_token)?, - } - } else { - None - }; - Ok(comment) - } - - pub fn parse_optional_procedure_parameters( - &mut self, - ) -> Result>, ParserError> { - let mut params = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok(Some(params)); - } - loop { - if let Token::Word(_) = self.peek_token().token { - params.push(self.parse_procedure_param()?) - } - let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma, even though it's not in standard - break; - } else if !comma { - return self.expected("',' or ')' after parameter definition", self.peek_token()); - } - } - Ok(Some(params)) - } - - pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { - let mut columns = vec![]; - let mut constraints = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok((columns, constraints)); - } - - loop { - if let Some(constraint) = self.parse_optional_table_constraint()? { - constraints.push(constraint); - } else if let Token::Word(_) = self.peek_token().token { - columns.push(self.parse_column_def()?); - } else { - return self.expected("column name or constraint definition", self.peek_token()); - } - - let comma = self.consume_token(&Token::Comma); - let rparen = self.peek_token().token == Token::RParen; - - if !comma && !rparen { - return self.expected("',' or ')' after column definition", self.peek_token()); - }; - - if rparen && (!comma || self.options.trailing_commas) { - let _ = self.consume_token(&Token::RParen); - break; - } - } - - Ok((columns, constraints)) - } - - pub fn parse_procedure_param(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let data_type = self.parse_data_type()?; - Ok(ProcedureParam { name, data_type }) - } - - pub fn parse_column_def(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let data_type = if self.is_column_type_sqlite_unspecified() { - DataType::Unspecified - } else { - self.parse_data_type()? - }; - let mut collation = if self.parse_keyword(Keyword::COLLATE) { - Some(self.parse_object_name(false)?) - } else { - None - }; - let mut options = vec![]; - loop { - if self.parse_keyword(Keyword::CONSTRAINT) { - let name = Some(self.parse_identifier(false)?); - if let Some(option) = self.parse_optional_column_option()? { - options.push(ColumnOptionDef { name, option }); - } else { - return self.expected( - "constraint details after CONSTRAINT ", - self.peek_token(), - ); - } - } else if let Some(option) = self.parse_optional_column_option()? { - options.push(ColumnOptionDef { name: None, option }); - } else if dialect_of!(self is MySqlDialect | SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COLLATE) - { - collation = Some(self.parse_object_name(false)?); - } else { - break; - }; - } - Ok(ColumnDef { - name, - data_type, - collation, - options, - }) - } - - fn is_column_type_sqlite_unspecified(&mut self) -> bool { - if dialect_of!(self is SQLiteDialect) { - match self.peek_token().token { - Token::Word(word) => matches!( - word.keyword, - Keyword::CONSTRAINT - | Keyword::PRIMARY - | Keyword::NOT - | Keyword::UNIQUE - | Keyword::CHECK - | Keyword::DEFAULT - | Keyword::COLLATE - | Keyword::REFERENCES - | Keyword::GENERATED - | Keyword::AS - ), - _ => true, // e.g. comma immediately after column name - } - } else { - false - } - } - - pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { - if let Some(option) = self.dialect.parse_column_option(self)? { - return option; - } - - if self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { - Ok(Some(ColumnOption::CharacterSet( - self.parse_object_name(false)?, - ))) - } else if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { - Ok(Some(ColumnOption::NotNull)) - } else if self.parse_keywords(&[Keyword::COMMENT]) { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(value, ..) => Ok(Some(ColumnOption::Comment(value))), - _ => self.expected("string", next_token), - } - } else if self.parse_keyword(Keyword::NULL) { - Ok(Some(ColumnOption::Null)) - } else if self.parse_keyword(Keyword::DEFAULT) { - Ok(Some(ColumnOption::Default(self.parse_expr()?))) - } else if dialect_of!(self is ClickHouseDialect| GenericDialect) - && self.parse_keyword(Keyword::MATERIALIZED) - { - Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) - } else if dialect_of!(self is ClickHouseDialect| GenericDialect) - && self.parse_keyword(Keyword::ALIAS) - { - Ok(Some(ColumnOption::Alias(self.parse_expr()?))) - } else if dialect_of!(self is ClickHouseDialect| GenericDialect) - && self.parse_keyword(Keyword::EPHEMERAL) - { - // The expression is optional for the EPHEMERAL syntax, so we need to check - // if the column definition has remaining tokens before parsing the expression. - if matches!(self.peek_token().token, Token::Comma | Token::RParen) { - Ok(Some(ColumnOption::Ephemeral(None))) - } else { - Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) - } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(ColumnOption::Unique { - is_primary: true, - characteristics, - })) - } else if self.parse_keyword(Keyword::UNIQUE) { - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(ColumnOption::Unique { - is_primary: false, - characteristics, - })) - } else if self.parse_keyword(Keyword::REFERENCES) { - let foreign_table = self.parse_object_name(false)?; - // PostgreSQL allows omitting the column list and - // uses the primary key column of the foreign table by default - let referred_columns = self.parse_parenthesized_column_list(Optional, false)?; - let mut on_delete = None; - let mut on_update = None; - loop { - if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { - on_delete = Some(self.parse_referential_action()?); - } else if on_update.is_none() - && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - { - on_update = Some(self.parse_referential_action()?); - } else { - break; - } - } - let characteristics = self.parse_constraint_characteristics()?; - - Ok(Some(ColumnOption::ForeignKey { - foreign_table, - referred_columns, - on_delete, - on_update, - characteristics, - })) - } else if self.parse_keyword(Keyword::CHECK) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Ok(Some(ColumnOption::Check(expr))) - } else if self.parse_keyword(Keyword::AUTO_INCREMENT) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - // Support AUTO_INCREMENT for MySQL - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTO_INCREMENT"), - ]))) - } else if self.parse_keyword(Keyword::AUTOINCREMENT) - && dialect_of!(self is SQLiteDialect | GenericDialect) - { - // Support AUTOINCREMENT for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("AUTOINCREMENT"), - ]))) - } else if self.parse_keyword(Keyword::ASC) - && self.dialect.supports_asc_desc_in_column_definition() - { - // Support ASC for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("ASC"), - ]))) - } else if self.parse_keyword(Keyword::DESC) - && self.dialect.supports_asc_desc_in_column_definition() - { - // Support DESC for SQLite - Ok(Some(ColumnOption::DialectSpecific(vec![ - Token::make_keyword("DESC"), - ]))) - } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - let expr = self.parse_expr()?; - Ok(Some(ColumnOption::OnUpdate(expr))) - } else if self.parse_keyword(Keyword::GENERATED) { - self.parse_optional_column_option_generated() - } else if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::OPTIONS) - { - self.prev_token(); - Ok(Some(ColumnOption::Options( - self.parse_options(Keyword::OPTIONS)?, - ))) - } else if self.parse_keyword(Keyword::AS) - && dialect_of!(self is MySqlDialect | SQLiteDialect | DuckDbDialect | GenericDialect) - { - self.parse_optional_column_option_as() - } else if self.parse_keyword(Keyword::IDENTITY) - && dialect_of!(self is MsSqlDialect | GenericDialect) - { - let parameters = if self.consume_token(&Token::LParen) { - let seed = self.parse_number()?; - self.expect_token(&Token::Comma)?; - let increment = self.parse_number()?; - self.expect_token(&Token::RParen)?; - - Some(IdentityPropertyFormatKind::FunctionCall( - IdentityParameters { seed, increment }, - )) - } else { - None - }; - Ok(Some(ColumnOption::Identity( - IdentityPropertyKind::Identity(IdentityProperty { - parameters, - order: None, - }), - ))) - } else if dialect_of!(self is SQLiteDialect | GenericDialect) - && self.parse_keywords(&[Keyword::ON, Keyword::CONFLICT]) - { - // Support ON CONFLICT for SQLite - Ok(Some(ColumnOption::OnConflict( - self.expect_one_of_keywords(&[ - Keyword::ROLLBACK, - Keyword::ABORT, - Keyword::FAIL, - Keyword::IGNORE, - Keyword::REPLACE, - ])?, - ))) - } else { - Ok(None) - } - } - - pub(crate) fn parse_tag(&mut self) -> Result { - let name = self.parse_identifier(false)?; - self.expect_token(&Token::Eq)?; - let value = self.parse_literal_string()?; - - Ok(Tag::new(name, value)) - } - - fn parse_optional_column_option_generated( - &mut self, - ) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS, Keyword::IDENTITY]) { - let mut sequence_options = vec![]; - if self.expect_token(&Token::LParen).is_ok() { - sequence_options = self.parse_create_sequence_options()?; - self.expect_token(&Token::RParen)?; - } - Ok(Some(ColumnOption::Generated { - generated_as: GeneratedAs::Always, - sequence_options: Some(sequence_options), - generation_expr: None, - generation_expr_mode: None, - generated_keyword: true, - })) - } else if self.parse_keywords(&[ - Keyword::BY, - Keyword::DEFAULT, - Keyword::AS, - Keyword::IDENTITY, - ]) { - let mut sequence_options = vec![]; - if self.expect_token(&Token::LParen).is_ok() { - sequence_options = self.parse_create_sequence_options()?; - self.expect_token(&Token::RParen)?; - } - Ok(Some(ColumnOption::Generated { - generated_as: GeneratedAs::ByDefault, - sequence_options: Some(sequence_options), - generation_expr: None, - generation_expr_mode: None, - generated_keyword: true, - })) - } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::AS]) { - if self.expect_token(&Token::LParen).is_ok() { - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { - Ok(( - GeneratedAs::ExpStored, - Some(GeneratedExpressionMode::Stored), - )) - } else if dialect_of!(self is PostgreSqlDialect) { - // Postgres' AS IDENTITY branches are above, this one needs STORED - self.expected("STORED", self.peek_token()) - } else if self.parse_keywords(&[Keyword::VIRTUAL]) { - Ok((GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual))) - } else { - Ok((GeneratedAs::Always, None)) - }?; - - Ok(Some(ColumnOption::Generated { - generated_as: gen_as, - sequence_options: None, - generation_expr: Some(expr), - generation_expr_mode: expr_mode, - generated_keyword: true, - })) - } else { - Ok(None) - } - } else { - Ok(None) - } - } - - fn parse_optional_column_option_as(&mut self) -> Result, ParserError> { - // Some DBs allow 'AS (expr)', shorthand for GENERATED ALWAYS AS - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - - let (gen_as, expr_mode) = if self.parse_keywords(&[Keyword::STORED]) { - ( - GeneratedAs::ExpStored, - Some(GeneratedExpressionMode::Stored), - ) - } else if self.parse_keywords(&[Keyword::VIRTUAL]) { - (GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual)) - } else { - (GeneratedAs::Always, None) - }; - - Ok(Some(ColumnOption::Generated { - generated_as: gen_as, - sequence_options: None, - generation_expr: Some(expr), - generation_expr_mode: expr_mode, - generated_keyword: false, - })) - } - - pub fn parse_optional_clustered_by(&mut self) -> Result, ParserError> { - let clustered_by = if dialect_of!(self is HiveDialect|GenericDialect) - && self.parse_keywords(&[Keyword::CLUSTERED, Keyword::BY]) - { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - let sorted_by = if self.parse_keywords(&[Keyword::SORTED, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let sorted_by_columns = self.parse_comma_separated(|p| p.parse_order_by_expr())?; - self.expect_token(&Token::RParen)?; - Some(sorted_by_columns) - } else { - None - }; - - self.expect_keyword(Keyword::INTO)?; - let num_buckets = self.parse_number_value()?; - self.expect_keyword(Keyword::BUCKETS)?; - Some(ClusteredBy { - columns, - sorted_by, - num_buckets, - }) - } else { - None - }; - Ok(clustered_by) - } - - pub fn parse_referential_action(&mut self) -> Result { - if self.parse_keyword(Keyword::RESTRICT) { - Ok(ReferentialAction::Restrict) - } else if self.parse_keyword(Keyword::CASCADE) { - Ok(ReferentialAction::Cascade) - } else if self.parse_keywords(&[Keyword::SET, Keyword::NULL]) { - Ok(ReferentialAction::SetNull) - } else if self.parse_keywords(&[Keyword::NO, Keyword::ACTION]) { - Ok(ReferentialAction::NoAction) - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - Ok(ReferentialAction::SetDefault) - } else { - self.expected( - "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", - self.peek_token(), - ) - } - } - - pub fn parse_constraint_characteristics( - &mut self, - ) -> Result, ParserError> { - let mut cc = ConstraintCharacteristics::default(); - - loop { - if cc.deferrable.is_none() && self.parse_keywords(&[Keyword::NOT, Keyword::DEFERRABLE]) - { - cc.deferrable = Some(false); - } else if cc.deferrable.is_none() && self.parse_keyword(Keyword::DEFERRABLE) { - cc.deferrable = Some(true); - } else if cc.initially.is_none() && self.parse_keyword(Keyword::INITIALLY) { - if self.parse_keyword(Keyword::DEFERRED) { - cc.initially = Some(DeferrableInitial::Deferred); - } else if self.parse_keyword(Keyword::IMMEDIATE) { - cc.initially = Some(DeferrableInitial::Immediate); - } else { - self.expected("one of DEFERRED or IMMEDIATE", self.peek_token())?; - } - } else if cc.enforced.is_none() && self.parse_keyword(Keyword::ENFORCED) { - cc.enforced = Some(true); - } else if cc.enforced.is_none() - && self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) - { - cc.enforced = Some(false); - } else { - break; - } - } - - if cc.deferrable.is_some() || cc.initially.is_some() || cc.enforced.is_some() { - Ok(Some(cc)) - } else { - Ok(None) - } - } - - pub fn parse_optional_table_constraint( - &mut self, - ) -> Result, ParserError> { - let name = if self.parse_keyword(Keyword::CONSTRAINT) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) if w.keyword == Keyword::UNIQUE => { - let index_type_display = self.parse_index_type_display(); - if !dialect_of!(self is GenericDialect | MySqlDialect) - && !index_type_display.is_none() - { - return self - .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); - } - - let nulls_distinct = self.parse_optional_nulls_distinct()?; - - // optional index name - let index_name = self.parse_optional_indent()?; - let index_type = self.parse_optional_using_then_index_type()?; - - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - let index_options = self.parse_index_options()?; - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(TableConstraint::Unique { - name, - index_name, - index_type_display, - index_type, - columns, - index_options, - characteristics, - nulls_distinct, - })) - } - Token::Word(w) if w.keyword == Keyword::PRIMARY => { - // after `PRIMARY` always stay `KEY` - self.expect_keyword(Keyword::KEY)?; - - // optional index name - let index_name = self.parse_optional_indent()?; - let index_type = self.parse_optional_using_then_index_type()?; - - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - let index_options = self.parse_index_options()?; - let characteristics = self.parse_constraint_characteristics()?; - Ok(Some(TableConstraint::PrimaryKey { - name, - index_name, - index_type, - columns, - index_options, - characteristics, - })) - } - Token::Word(w) if w.keyword == Keyword::FOREIGN => { - self.expect_keyword(Keyword::KEY)?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - self.expect_keyword(Keyword::REFERENCES)?; - let foreign_table = self.parse_object_name(false)?; - let referred_columns = self.parse_parenthesized_column_list(Mandatory, false)?; - let mut on_delete = None; - let mut on_update = None; - loop { - if on_delete.is_none() && self.parse_keywords(&[Keyword::ON, Keyword::DELETE]) { - on_delete = Some(self.parse_referential_action()?); - } else if on_update.is_none() - && self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) - { - on_update = Some(self.parse_referential_action()?); - } else { - break; - } - } - - let characteristics = self.parse_constraint_characteristics()?; - - Ok(Some(TableConstraint::ForeignKey { - name, - columns, - foreign_table, - referred_columns, - on_delete, - on_update, - characteristics, - })) - } - Token::Word(w) if w.keyword == Keyword::CHECK => { - self.expect_token(&Token::LParen)?; - let expr = Box::new(self.parse_expr()?); - self.expect_token(&Token::RParen)?; - Ok(Some(TableConstraint::Check { name, expr })) - } - Token::Word(w) - if (w.keyword == Keyword::INDEX || w.keyword == Keyword::KEY) - && dialect_of!(self is GenericDialect | MySqlDialect) - && name.is_none() => - { - let display_as_key = w.keyword == Keyword::KEY; - - let name = match self.peek_token().token { - Token::Word(word) if word.keyword == Keyword::USING => None, - _ => self.parse_optional_indent()?, - }; - - let index_type = self.parse_optional_using_then_index_type()?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - Ok(Some(TableConstraint::Index { - display_as_key, - name, - index_type, - columns, - })) - } - Token::Word(w) - if (w.keyword == Keyword::FULLTEXT || w.keyword == Keyword::SPATIAL) - && dialect_of!(self is GenericDialect | MySqlDialect) => - { - if let Some(name) = name { - return self.expected( - "FULLTEXT or SPATIAL option without constraint name", - TokenWithSpan { - token: Token::make_keyword(&name.to_string()), - span: next_token.span, - }, - ); - } - - let fulltext = w.keyword == Keyword::FULLTEXT; - - let index_type_display = self.parse_index_type_display(); - - let opt_index_name = self.parse_optional_indent()?; - - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - - Ok(Some(TableConstraint::FulltextOrSpatial { - fulltext, - index_type_display, - opt_index_name, - columns, - })) - } - _ => { - if name.is_some() { - self.expected("PRIMARY, UNIQUE, FOREIGN, or CHECK", next_token) - } else { - self.prev_token(); - Ok(None) - } - } - } - } - - fn parse_optional_nulls_distinct(&mut self) -> Result { - Ok(if self.parse_keyword(Keyword::NULLS) { - let not = self.parse_keyword(Keyword::NOT); - self.expect_keyword(Keyword::DISTINCT)?; - if not { - NullsDistinctOption::NotDistinct - } else { - NullsDistinctOption::Distinct - } - } else { - NullsDistinctOption::None - }) - } - - pub fn maybe_parse_options( - &mut self, - keyword: Keyword, - ) -> Result>, ParserError> { - if let Token::Word(word) = self.peek_token().token { - if word.keyword == keyword { - return Ok(Some(self.parse_options(keyword)?)); - } - }; - Ok(None) - } - - pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { - if self.parse_keyword(keyword) { - self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Ok(options) - } else { - Ok(vec![]) - } - } - - pub fn parse_options_with_keywords( - &mut self, - keywords: &[Keyword], - ) -> Result, ParserError> { - if self.parse_keywords(keywords) { - self.expect_token(&Token::LParen)?; - let options = self.parse_comma_separated(Parser::parse_sql_option)?; - self.expect_token(&Token::RParen)?; - Ok(options) - } else { - Ok(vec![]) - } - } - - pub fn parse_index_type(&mut self) -> Result { - if self.parse_keyword(Keyword::BTREE) { - Ok(IndexType::BTree) - } else if self.parse_keyword(Keyword::HASH) { - Ok(IndexType::Hash) - } else { - self.expected("index type {BTREE | HASH}", self.peek_token()) - } - } - - /// Parse [USING {BTREE | HASH}] - pub fn parse_optional_using_then_index_type( - &mut self, - ) -> Result, ParserError> { - if self.parse_keyword(Keyword::USING) { - Ok(Some(self.parse_index_type()?)) - } else { - Ok(None) - } - } - - /// Parse `[ident]`, mostly `ident` is name, like: - /// `window_name`, `index_name`, ... - pub fn parse_optional_indent(&mut self) -> Result, ParserError> { - self.maybe_parse(|parser| parser.parse_identifier(false)) - } - - #[must_use] - pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { - if self.parse_keyword(Keyword::KEY) { - KeyOrIndexDisplay::Key - } else if self.parse_keyword(Keyword::INDEX) { - KeyOrIndexDisplay::Index - } else { - KeyOrIndexDisplay::None - } - } - - pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { - if let Some(index_type) = self.parse_optional_using_then_index_type()? { - Ok(Some(IndexOption::Using(index_type))) - } else if self.parse_keyword(Keyword::COMMENT) { - let s = self.parse_literal_string()?; - Ok(Some(IndexOption::Comment(s))) - } else { - Ok(None) - } - } - - pub fn parse_index_options(&mut self) -> Result, ParserError> { - let mut options = Vec::new(); - - loop { - match self.parse_optional_index_option()? { - Some(index_option) => options.push(index_option), - None => return Ok(options), - } - } - } - - pub fn parse_sql_option(&mut self) -> Result { - let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); - - match self.peek_token().token { - Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { - Ok(SqlOption::Ident(self.parse_identifier(false)?)) - } - Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => { - self.parse_option_partition() - } - Token::Word(w) if w.keyword == Keyword::CLUSTERED && is_mssql => { - self.parse_option_clustered() - } - _ => { - let name = self.parse_identifier(false)?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - - Ok(SqlOption::KeyValue { key: name, value }) - } - } - } - - pub fn parse_option_clustered(&mut self) -> Result { - if self.parse_keywords(&[ - Keyword::CLUSTERED, - Keyword::COLUMNSTORE, - Keyword::INDEX, - Keyword::ORDER, - ]) { - Ok(SqlOption::Clustered( - TableOptionsClustered::ColumnstoreIndexOrder( - self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, - ), - )) - } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::COLUMNSTORE, Keyword::INDEX]) { - Ok(SqlOption::Clustered( - TableOptionsClustered::ColumnstoreIndex, - )) - } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::INDEX]) { - self.expect_token(&Token::LParen)?; - - let columns = self.parse_comma_separated(|p| { - let name = p.parse_identifier(false)?; - let asc = p.parse_asc_desc(); - - Ok(ClusteredIndex { name, asc }) - })?; - - self.expect_token(&Token::RParen)?; - - Ok(SqlOption::Clustered(TableOptionsClustered::Index(columns))) - } else { - Err(ParserError::ParserError( - "invalid CLUSTERED sequence".to_string(), - )) - } - } - - pub fn parse_option_partition(&mut self) -> Result { - self.expect_keyword(Keyword::PARTITION)?; - self.expect_token(&Token::LParen)?; - let column_name = self.parse_identifier(false)?; - - self.expect_keyword(Keyword::RANGE)?; - let range_direction = if self.parse_keyword(Keyword::LEFT) { - Some(PartitionRangeDirection::Left) - } else if self.parse_keyword(Keyword::RIGHT) { - Some(PartitionRangeDirection::Right) - } else { - None - }; - - self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?; - self.expect_token(&Token::LParen)?; - - let for_values = self.parse_comma_separated(Parser::parse_expr)?; - - self.expect_token(&Token::RParen)?; - self.expect_token(&Token::RParen)?; - - Ok(SqlOption::Partition { - column_name, - range_direction, - for_values, - }) - } - - pub fn parse_partition(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - Ok(Partition::Partitions(partitions)) - } - - pub fn parse_projection_select(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - self.expect_keyword(Keyword::SELECT)?; - let projection = self.parse_projection()?; - let group_by = self.parse_optional_group_by()?; - let order_by = self.parse_optional_order_by()?; - self.expect_token(&Token::RParen)?; - Ok(ProjectionSelect { - projection, - group_by, - order_by, - }) - } - pub fn parse_alter_table_add_projection(&mut self) -> Result { - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let query = self.parse_projection_select()?; - Ok(AlterTableOperation::AddProjection { - if_not_exists, - name, - select: query, - }) - } - - pub fn parse_alter_table_operation(&mut self) -> Result { - let operation = if self.parse_keyword(Keyword::ADD) { - if let Some(constraint) = self.parse_optional_table_constraint()? { - AlterTableOperation::AddConstraint(constraint) - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::PROJECTION) - { - return self.parse_alter_table_add_projection(); - } else { - let if_not_exists = - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - let mut new_partitions = vec![]; - loop { - if self.parse_keyword(Keyword::PARTITION) { - new_partitions.push(self.parse_partition()?); - } else { - break; - } - } - if !new_partitions.is_empty() { - AlterTableOperation::AddPartitions { - if_not_exists, - new_partitions, - } - } else { - let column_keyword = self.parse_keyword(Keyword::COLUMN); - - let if_not_exists = if dialect_of!(self is PostgreSqlDialect | BigQueryDialect | DuckDbDialect | GenericDialect) - { - self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]) - || if_not_exists - } else { - false - }; - - let column_def = self.parse_column_def()?; - - let column_position = self.parse_column_position()?; - - AlterTableOperation::AddColumn { - column_keyword, - if_not_exists, - column_def, - column_position, - } - } - } - } else if self.parse_keyword(Keyword::RENAME) { - if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::CONSTRAINT) { - let old_name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::TO)?; - let new_name = self.parse_identifier(false)?; - AlterTableOperation::RenameConstraint { old_name, new_name } - } else if self.parse_keyword(Keyword::TO) { - let table_name = self.parse_object_name(false)?; - AlterTableOperation::RenameTable { table_name } - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_column_name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::TO)?; - let new_column_name = self.parse_identifier(false)?; - AlterTableOperation::RenameColumn { - old_column_name, - new_column_name, - } - } - } else if self.parse_keyword(Keyword::DISABLE) { - if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { - AlterTableOperation::DisableRowLevelSecurity {} - } else if self.parse_keyword(Keyword::RULE) { - let name = self.parse_identifier(false)?; - AlterTableOperation::DisableRule { name } - } else if self.parse_keyword(Keyword::TRIGGER) { - let name = self.parse_identifier(false)?; - AlterTableOperation::DisableTrigger { name } - } else { - return self.expected( - "ROW LEVEL SECURITY, RULE, or TRIGGER after DISABLE", - self.peek_token(), - ); - } - } else if self.parse_keyword(Keyword::ENABLE) { - if self.parse_keywords(&[Keyword::ALWAYS, Keyword::RULE]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableAlwaysRule { name } - } else if self.parse_keywords(&[Keyword::ALWAYS, Keyword::TRIGGER]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableAlwaysTrigger { name } - } else if self.parse_keywords(&[Keyword::ROW, Keyword::LEVEL, Keyword::SECURITY]) { - AlterTableOperation::EnableRowLevelSecurity {} - } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::RULE]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableReplicaRule { name } - } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::TRIGGER]) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableReplicaTrigger { name } - } else if self.parse_keyword(Keyword::RULE) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableRule { name } - } else if self.parse_keyword(Keyword::TRIGGER) { - let name = self.parse_identifier(false)?; - AlterTableOperation::EnableTrigger { name } - } else { - return self.expected( - "ALWAYS, REPLICA, ROW LEVEL SECURITY, RULE, or TRIGGER after ENABLE", - self.peek_token(), - ); - } - } else if self.parse_keywords(&[Keyword::CLEAR, Keyword::PROJECTION]) - && dialect_of!(self is ClickHouseDialect|GenericDialect) - { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::ClearProjection { - if_exists, - name, - partition, - } - } else if self.parse_keywords(&[Keyword::MATERIALIZE, Keyword::PROJECTION]) - && dialect_of!(self is ClickHouseDialect|GenericDialect) - { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let partition = if self.parse_keywords(&[Keyword::IN, Keyword::PARTITION]) { - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::MaterializeProjection { - if_exists, - name, - partition, - } - } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: true, - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::DropPartitions { - partitions, - if_exists: false, - } - } else if self.parse_keyword(Keyword::CONSTRAINT) { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropConstraint { - if_exists, - name, - cascade, - } - } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - AlterTableOperation::DropPrimaryKey - } else if self.parse_keyword(Keyword::PROJECTION) - && dialect_of!(self is ClickHouseDialect|GenericDialect) - { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let name = self.parse_identifier(false)?; - AlterTableOperation::DropProjection { if_exists, name } - } else if self.parse_keywords(&[Keyword::CLUSTERING, Keyword::KEY]) { - AlterTableOperation::DropClusteringKey - } else { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let column_name = self.parse_identifier(false)?; - let cascade = self.parse_keyword(Keyword::CASCADE); - AlterTableOperation::DropColumn { - column_name, - if_exists, - cascade, - } - } - } else if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let before = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - self.expect_keyword(Keyword::RENAME)?; - self.expect_keywords(&[Keyword::TO, Keyword::PARTITION])?; - self.expect_token(&Token::LParen)?; - let renames = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::RenamePartitions { - old_partitions: before, - new_partitions: renames, - } - } else if self.parse_keyword(Keyword::CHANGE) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let old_name = self.parse_identifier(false)?; - let new_name = self.parse_identifier(false)?; - let data_type = self.parse_data_type()?; - let mut options = vec![]; - while let Some(option) = self.parse_optional_column_option()? { - options.push(option); - } - - let column_position = self.parse_column_position()?; - - AlterTableOperation::ChangeColumn { - old_name, - new_name, - data_type, - options, - column_position, - } - } else if self.parse_keyword(Keyword::MODIFY) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let col_name = self.parse_identifier(false)?; - let data_type = self.parse_data_type()?; - let mut options = vec![]; - while let Some(option) = self.parse_optional_column_option()? { - options.push(option); - } - - let column_position = self.parse_column_position()?; - - AlterTableOperation::ModifyColumn { - col_name, - data_type, - options, - column_position, - } - } else if self.parse_keyword(Keyword::ALTER) { - let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] - let column_name = self.parse_identifier(false)?; - let is_postgresql = dialect_of!(self is PostgreSqlDialect); - - let op: AlterColumnOperation = if self.parse_keywords(&[ - Keyword::SET, - Keyword::NOT, - Keyword::NULL, - ]) { - AlterColumnOperation::SetNotNull {} - } else if self.parse_keywords(&[Keyword::DROP, Keyword::NOT, Keyword::NULL]) { - AlterColumnOperation::DropNotNull {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { - AlterColumnOperation::SetDefault { - value: self.parse_expr()?, - } - } else if self.parse_keywords(&[Keyword::DROP, Keyword::DEFAULT]) { - AlterColumnOperation::DropDefault {} - } else if self.parse_keywords(&[Keyword::SET, Keyword::DATA, Keyword::TYPE]) - || (is_postgresql && self.parse_keyword(Keyword::TYPE)) - { - let data_type = self.parse_data_type()?; - let using = if is_postgresql && self.parse_keyword(Keyword::USING) { - Some(self.parse_expr()?) - } else { - None - }; - AlterColumnOperation::SetDataType { data_type, using } - } else if self.parse_keywords(&[Keyword::ADD, Keyword::GENERATED]) { - let generated_as = if self.parse_keyword(Keyword::ALWAYS) { - Some(GeneratedAs::Always) - } else if self.parse_keywords(&[Keyword::BY, Keyword::DEFAULT]) { - Some(GeneratedAs::ByDefault) - } else { - None - }; - - self.expect_keywords(&[Keyword::AS, Keyword::IDENTITY])?; - - let mut sequence_options: Option> = None; - - if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - sequence_options = Some(self.parse_create_sequence_options()?); - self.expect_token(&Token::RParen)?; - } - - AlterColumnOperation::AddGenerated { - generated_as, - sequence_options, - } - } else { - let message = if is_postgresql { - "SET/DROP NOT NULL, SET DEFAULT, SET DATA TYPE, or ADD GENERATED after ALTER COLUMN" - } else { - "SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN" - }; - - return self.expected(message, self.peek_token()); - }; - AlterTableOperation::AlterColumn { column_name, op } - } else if self.parse_keyword(Keyword::SWAP) { - self.expect_keyword(Keyword::WITH)?; - let table_name = self.parse_object_name(false)?; - AlterTableOperation::SwapWith { table_name } - } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) - && self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) - { - let new_owner = self.parse_owner()?; - AlterTableOperation::OwnerTo { new_owner } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::ATTACH) - { - AlterTableOperation::AttachPartition { - partition: self.parse_part_or_partition()?, - } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::DETACH) - { - AlterTableOperation::DetachPartition { - partition: self.parse_part_or_partition()?, - } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::FREEZE) - { - let partition = self.parse_part_or_partition()?; - let with_name = if self.parse_keyword(Keyword::WITH) { - self.expect_keyword(Keyword::NAME)?; - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::FreezePartition { - partition, - with_name, - } - } else if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::UNFREEZE) - { - let partition = self.parse_part_or_partition()?; - let with_name = if self.parse_keyword(Keyword::WITH) { - self.expect_keyword(Keyword::NAME)?; - Some(self.parse_identifier(false)?) - } else { - None - }; - AlterTableOperation::UnfreezePartition { - partition, - with_name, - } - } else if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - self.expect_token(&Token::LParen)?; - let exprs = self.parse_comma_separated(|parser| parser.parse_expr())?; - self.expect_token(&Token::RParen)?; - AlterTableOperation::ClusterBy { exprs } - } else if self.parse_keywords(&[Keyword::SUSPEND, Keyword::RECLUSTER]) { - AlterTableOperation::SuspendRecluster - } else if self.parse_keywords(&[Keyword::RESUME, Keyword::RECLUSTER]) { - AlterTableOperation::ResumeRecluster - } else { - let options: Vec = - self.parse_options_with_keywords(&[Keyword::SET, Keyword::TBLPROPERTIES])?; - if !options.is_empty() { - AlterTableOperation::SetTblProperties { - table_properties: options, - } - } else { - return self.expected( - "ADD, RENAME, PARTITION, SWAP, DROP, or SET TBLPROPERTIES after ALTER TABLE", - self.peek_token(), - ); - } - }; - Ok(operation) - } - - fn parse_part_or_partition(&mut self) -> Result { - let keyword = self.expect_one_of_keywords(&[Keyword::PART, Keyword::PARTITION])?; - match keyword { - Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), - Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), - // unreachable because expect_one_of_keywords used above - _ => unreachable!(), - } - } - - pub fn parse_alter(&mut self) -> Result { - let object_type = self.expect_one_of_keywords(&[ - Keyword::VIEW, - Keyword::TABLE, - Keyword::INDEX, - Keyword::ROLE, - Keyword::POLICY, - ])?; - match object_type { - Keyword::VIEW => self.parse_alter_view(), - Keyword::TABLE => { - let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let only = self.parse_keyword(Keyword::ONLY); // [ ONLY ] - let table_name = self.parse_object_name(false)?; - let on_cluster = self.parse_optional_on_cluster()?; - let operations = self.parse_comma_separated(Parser::parse_alter_table_operation)?; - - let mut location = None; - if self.parse_keyword(Keyword::LOCATION) { - location = Some(HiveSetLocation { - has_set: false, - location: self.parse_identifier(false)?, - }); - } else if self.parse_keywords(&[Keyword::SET, Keyword::LOCATION]) { - location = Some(HiveSetLocation { - has_set: true, - location: self.parse_identifier(false)?, - }); - } - - Ok(Statement::AlterTable { - name: table_name, - if_exists, - only, - operations, - location, - on_cluster, - }) - } - Keyword::INDEX => { - let index_name = self.parse_object_name(false)?; - let operation = if self.parse_keyword(Keyword::RENAME) { - if self.parse_keyword(Keyword::TO) { - let index_name = self.parse_object_name(false)?; - AlterIndexOperation::RenameIndex { index_name } - } else { - return self.expected("TO after RENAME", self.peek_token()); - } - } else { - return self.expected("RENAME after ALTER INDEX", self.peek_token()); - }; - - Ok(Statement::AlterIndex { - name: index_name, - operation, - }) - } - Keyword::ROLE => self.parse_alter_role(), - Keyword::POLICY => self.parse_alter_policy(), - // unreachable because expect_one_of_keywords used above - _ => unreachable!(), - } - } - - pub fn parse_alter_view(&mut self) -> Result { - let name = self.parse_object_name(false)?; - let columns = self.parse_parenthesized_column_list(Optional, false)?; - - let with_options = self.parse_options(Keyword::WITH)?; - - self.expect_keyword(Keyword::AS)?; - let query = self.parse_query()?; - - Ok(Statement::AlterView { - name, - columns, - query, - with_options, - }) - } - - /// Parse a `CALL procedure_name(arg1, arg2, ...)` - /// or `CALL procedure_name` statement - pub fn parse_call(&mut self) -> Result { - let object_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::LParen { - match self.parse_function(object_name)? { - Expr::Function(f) => Ok(Statement::Call(f)), - other => parser_err!( - format!("Expected a simple procedure call but found: {other}"), - self.peek_token().span.start - ), - } - } else { - Ok(Statement::Call(Function { - name: object_name, - parameters: FunctionArguments::None, - args: FunctionArguments::None, - over: None, - filter: None, - null_treatment: None, - within_group: vec![], - })) - } - } - - /// Parse a copy statement - pub fn parse_copy(&mut self) -> Result { - let source; - if self.consume_token(&Token::LParen) { - source = CopySource::Query(self.parse_query()?); - self.expect_token(&Token::RParen)?; - } else { - let table_name = self.parse_object_name(false)?; - let columns = self.parse_parenthesized_column_list(Optional, false)?; - source = CopySource::Table { - table_name, - columns, - }; - } - let to = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::TO]) { - Some(Keyword::FROM) => false, - Some(Keyword::TO) => true, - _ => self.expected("FROM or TO", self.peek_token())?, - }; - if !to { - // Use a separate if statement to prevent Rust compiler from complaining about - // "if statement in this position is unstable: https://github.com/rust-lang/rust/issues/53667" - if let CopySource::Query(_) = source { - return Err(ParserError::ParserError( - "COPY ... FROM does not support query as a source".to_string(), - )); - } - } - let target = if self.parse_keyword(Keyword::STDIN) { - CopyTarget::Stdin - } else if self.parse_keyword(Keyword::STDOUT) { - CopyTarget::Stdout - } else if self.parse_keyword(Keyword::PROGRAM) { - CopyTarget::Program { - command: self.parse_literal_string()?, - } - } else { - CopyTarget::File { - filename: self.parse_literal_string()?, - } - }; - let _ = self.parse_keyword(Keyword::WITH); // [ WITH ] - let mut options = vec![]; - if self.consume_token(&Token::LParen) { - options = self.parse_comma_separated(Parser::parse_copy_option)?; - self.expect_token(&Token::RParen)?; - } - let mut legacy_options = vec![]; - while let Some(opt) = self.maybe_parse(|parser| parser.parse_copy_legacy_option())? { - legacy_options.push(opt); - } - let values = if let CopyTarget::Stdin = target { - self.expect_token(&Token::SemiColon)?; - self.parse_tsv() - } else { - vec![] - }; - Ok(Statement::Copy { - source, - to, - target, - options, - legacy_options, - values, - }) - } - - pub fn parse_close(&mut self) -> Result { - let cursor = if self.parse_keyword(Keyword::ALL) { - CloseCursor::All - } else { - let name = self.parse_identifier(false)?; - - CloseCursor::Specific { name } - }; - - Ok(Statement::Close { cursor }) - } - - fn parse_copy_option(&mut self) -> Result { - let ret = match self.parse_one_of_keywords(&[ - Keyword::FORMAT, - Keyword::FREEZE, - Keyword::DELIMITER, - Keyword::NULL, - Keyword::HEADER, - Keyword::QUOTE, - Keyword::ESCAPE, - Keyword::FORCE_QUOTE, - Keyword::FORCE_NOT_NULL, - Keyword::FORCE_NULL, - Keyword::ENCODING, - ]) { - Some(Keyword::FORMAT) => CopyOption::Format(self.parse_identifier(false)?), - Some(Keyword::FREEZE) => CopyOption::Freeze(!matches!( - self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), - Some(Keyword::FALSE) - )), - Some(Keyword::DELIMITER) => CopyOption::Delimiter(self.parse_literal_char()?), - Some(Keyword::NULL) => CopyOption::Null(self.parse_literal_string()?), - Some(Keyword::HEADER) => CopyOption::Header(!matches!( - self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]), - Some(Keyword::FALSE) - )), - Some(Keyword::QUOTE) => CopyOption::Quote(self.parse_literal_char()?), - Some(Keyword::ESCAPE) => CopyOption::Escape(self.parse_literal_char()?), - Some(Keyword::FORCE_QUOTE) => { - CopyOption::ForceQuote(self.parse_parenthesized_column_list(Mandatory, false)?) - } - Some(Keyword::FORCE_NOT_NULL) => { - CopyOption::ForceNotNull(self.parse_parenthesized_column_list(Mandatory, false)?) - } - Some(Keyword::FORCE_NULL) => { - CopyOption::ForceNull(self.parse_parenthesized_column_list(Mandatory, false)?) - } - Some(Keyword::ENCODING) => CopyOption::Encoding(self.parse_literal_string()?), - _ => self.expected("option", self.peek_token())?, - }; - Ok(ret) - } - - fn parse_copy_legacy_option(&mut self) -> Result { - let ret = match self.parse_one_of_keywords(&[ - Keyword::BINARY, - Keyword::DELIMITER, - Keyword::NULL, - Keyword::CSV, - ]) { - Some(Keyword::BINARY) => CopyLegacyOption::Binary, - Some(Keyword::DELIMITER) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyOption::Delimiter(self.parse_literal_char()?) - } - Some(Keyword::NULL) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyOption::Null(self.parse_literal_string()?) - } - Some(Keyword::CSV) => CopyLegacyOption::Csv({ - let mut opts = vec![]; - while let Some(opt) = - self.maybe_parse(|parser| parser.parse_copy_legacy_csv_option())? - { - opts.push(opt); - } - opts - }), - _ => self.expected("option", self.peek_token())?, - }; - Ok(ret) - } - - fn parse_copy_legacy_csv_option(&mut self) -> Result { - let ret = match self.parse_one_of_keywords(&[ - Keyword::HEADER, - Keyword::QUOTE, - Keyword::ESCAPE, - Keyword::FORCE, - ]) { - Some(Keyword::HEADER) => CopyLegacyCsvOption::Header, - Some(Keyword::QUOTE) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyCsvOption::Quote(self.parse_literal_char()?) - } - Some(Keyword::ESCAPE) => { - let _ = self.parse_keyword(Keyword::AS); // [ AS ] - CopyLegacyCsvOption::Escape(self.parse_literal_char()?) - } - Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::NOT, Keyword::NULL]) => { - CopyLegacyCsvOption::ForceNotNull( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - ) - } - Some(Keyword::FORCE) if self.parse_keywords(&[Keyword::QUOTE]) => { - CopyLegacyCsvOption::ForceQuote( - self.parse_comma_separated(|p| p.parse_identifier(false))?, - ) - } - _ => self.expected("csv option", self.peek_token())?, - }; - Ok(ret) - } - - fn parse_literal_char(&mut self) -> Result { - let s = self.parse_literal_string()?; - if s.len() != 1 { - let loc = self - .tokens - .get(self.index - 1) - .map_or(Location { line: 0, column: 0 }, |t| t.span.start); - return parser_err!(format!("Expect a char, found {s:?}"), loc); - } - Ok(s.chars().next().unwrap()) - } - - /// Parse a tab separated values in - /// COPY payload - pub fn parse_tsv(&mut self) -> Vec> { - self.parse_tab_value() - } - - pub fn parse_tab_value(&mut self) -> Vec> { - let mut values = vec![]; - let mut content = String::from(""); - while let Some(t) = self.next_token_no_skip().map(|t| &t.token) { - match t { - Token::Whitespace(Whitespace::Tab) => { - values.push(Some(content.to_string())); - content.clear(); - } - Token::Whitespace(Whitespace::Newline) => { - values.push(Some(content.to_string())); - content.clear(); - } - Token::Backslash => { - if self.consume_token(&Token::Period) { - return values; - } - if let Token::Word(w) = self.next_token().token { - if w.value == "N" { - values.push(None); - } - } - } - _ => { - content.push_str(&t.to_string()); - } - } - } - values - } - - /// Parse a literal value (numbers, strings, date/time, booleans) - pub fn parse_value(&mut self) -> Result { - let next_token = self.next_token(); - let span = next_token.span; - match next_token.token { - Token::Word(w) => match w.keyword { - Keyword::TRUE if self.dialect.supports_boolean_literals() => { - Ok(Value::Boolean(true)) - } - Keyword::FALSE if self.dialect.supports_boolean_literals() => { - Ok(Value::Boolean(false)) - } - Keyword::NULL => Ok(Value::Null), - Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { - Some('"') => Ok(Value::DoubleQuotedString(w.value)), - Some('\'') => Ok(Value::SingleQuotedString(w.value)), - _ => self.expected( - "A value?", - TokenWithSpan { - token: Token::Word(w), - span, - }, - )?, - }, - _ => self.expected( - "a concrete value", - TokenWithSpan { - token: Token::Word(w), - span, - }, - ), - }, - // The call to n.parse() returns a bigdecimal when the - // bigdecimal feature is enabled, and is otherwise a no-op - // (i.e., it returns the input string). - Token::Number(n, l) => Ok(Value::Number(Self::parse(n, span.start)?, l)), - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), - Token::TripleSingleQuotedString(ref s) => { - Ok(Value::TripleSingleQuotedString(s.to_string())) - } - Token::TripleDoubleQuotedString(ref s) => { - Ok(Value::TripleDoubleQuotedString(s.to_string())) - } - Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), - Token::SingleQuotedByteStringLiteral(ref s) => { - Ok(Value::SingleQuotedByteStringLiteral(s.clone())) - } - Token::DoubleQuotedByteStringLiteral(ref s) => { - Ok(Value::DoubleQuotedByteStringLiteral(s.clone())) - } - Token::TripleSingleQuotedByteStringLiteral(ref s) => { - Ok(Value::TripleSingleQuotedByteStringLiteral(s.clone())) - } - Token::TripleDoubleQuotedByteStringLiteral(ref s) => { - Ok(Value::TripleDoubleQuotedByteStringLiteral(s.clone())) - } - Token::SingleQuotedRawStringLiteral(ref s) => { - Ok(Value::SingleQuotedRawStringLiteral(s.clone())) - } - Token::DoubleQuotedRawStringLiteral(ref s) => { - Ok(Value::DoubleQuotedRawStringLiteral(s.clone())) - } - Token::TripleSingleQuotedRawStringLiteral(ref s) => { - Ok(Value::TripleSingleQuotedRawStringLiteral(s.clone())) - } - Token::TripleDoubleQuotedRawStringLiteral(ref s) => { - Ok(Value::TripleDoubleQuotedRawStringLiteral(s.clone())) - } - Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), - Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), - Token::UnicodeStringLiteral(ref s) => Ok(Value::UnicodeStringLiteral(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), - Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), - tok @ Token::Colon | tok @ Token::AtSign => { - // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies - // This because snowflake allows numbers as placeholders - let next_token = self.next_token(); - let ident = match next_token.token { - Token::Word(w) => Ok(w.to_ident(next_token.span)), - Token::Number(w, false) => Ok(Ident::new(w)), - _ => self.expected("placeholder", next_token), - }?; - let placeholder = tok.to_string() + &ident.value; - Ok(Value::Placeholder(placeholder)) - } - unexpected => self.expected( - "a value", - TokenWithSpan { - token: unexpected, - span, - }, - ), - } - } - - /// Parse an unsigned numeric literal - pub fn parse_number_value(&mut self) -> Result { - match self.parse_value()? { - v @ Value::Number(_, _) => Ok(v), - v @ Value::Placeholder(_) => Ok(v), - _ => { - self.prev_token(); - self.expected("literal number", self.peek_token()) - } - } - } - - /// Parse a numeric literal as an expression. Returns a [`Expr::UnaryOp`] if the number is signed, - /// otherwise returns a [`Expr::Value`] - pub fn parse_number(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Plus => Ok(Expr::UnaryOp { - op: UnaryOperator::Plus, - expr: Box::new(Expr::Value(self.parse_number_value()?)), - }), - Token::Minus => Ok(Expr::UnaryOp { - op: UnaryOperator::Minus, - expr: Box::new(Expr::Value(self.parse_number_value()?)), - }), - _ => { - self.prev_token(); - Ok(Expr::Value(self.parse_number_value()?)) - } - } - } - - fn parse_introduced_string_value(&mut self) -> Result { - let next_token = self.next_token(); - let span = next_token.span; - match next_token.token { - Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), - Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), - Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), - unexpected => self.expected( - "a string value", - TokenWithSpan { - token: unexpected, - span, - }, - ), - } - } - - /// Parse an unsigned literal integer/long - pub fn parse_literal_uint(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Number(s, _) => Self::parse::(s, next_token.span.start), - _ => self.expected("literal int", next_token), - } - } - - /// Parse the body of a `CREATE FUNCTION` specified as a string. - /// e.g. `CREATE FUNCTION ... AS $$ body $$`. - fn parse_create_function_body_string(&mut self) -> Result { - let peek_token = self.peek_token(); - match peek_token.token { - Token::DollarQuotedString(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => - { - self.next_token(); - Ok(Expr::Value(Value::DollarQuotedString(s))) - } - _ => Ok(Expr::Value(Value::SingleQuotedString( - self.parse_literal_string()?, - ))), - } - } - - /// Parse a literal string - pub fn parse_literal_string(&mut self) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Word(Word { - value, - keyword: Keyword::NoKeyword, - .. - }) => Ok(value), - Token::SingleQuotedString(s) => Ok(s), - Token::DoubleQuotedString(s) => Ok(s), - Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { - Ok(s) - } - Token::UnicodeStringLiteral(s) => Ok(s), - _ => self.expected("literal string", next_token), - } - } - - pub fn parse_enum_values(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let values = self.parse_comma_separated(|parser| { - let name = parser.parse_literal_string()?; - let e = if parser.consume_token(&Token::Eq) { - let value = parser.parse_number()?; - EnumMember::NamedValue(name, value) - } else { - EnumMember::Name(name) - }; - Ok(e) - })?; - self.expect_token(&Token::RParen)?; - - Ok(values) - } - - /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) - pub fn parse_data_type(&mut self) -> Result { - let (ty, trailing_bracket) = self.parse_data_type_helper()?; - if trailing_bracket.0 { - return parser_err!( - format!("unmatched > after parsing data type {ty}"), - self.peek_token() - ); - } - - Ok(ty) - } - - fn parse_data_type_helper( - &mut self, - ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { - let next_token = self.next_token(); - let mut trailing_bracket: MatchedTrailingBracket = false.into(); - let mut data = match next_token.token { - Token::Word(w) => match w.keyword { - Keyword::BOOLEAN => Ok(DataType::Boolean), - Keyword::BOOL => Ok(DataType::Bool), - Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), - Keyword::REAL => Ok(DataType::Real), - Keyword::FLOAT4 => Ok(DataType::Float4), - Keyword::FLOAT32 => Ok(DataType::Float32), - Keyword::FLOAT64 => Ok(DataType::Float64), - Keyword::FLOAT8 => Ok(DataType::Float8), - Keyword::DOUBLE => { - if self.parse_keyword(Keyword::PRECISION) { - Ok(DataType::DoublePrecision) - } else { - Ok(DataType::Double) - } - } - Keyword::TINYINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedTinyInt(optional_precision?)) - } else { - Ok(DataType::TinyInt(optional_precision?)) - } - } - Keyword::INT2 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt2(optional_precision?)) - } else { - Ok(DataType::Int2(optional_precision?)) - } - } - Keyword::SMALLINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedSmallInt(optional_precision?)) - } else { - Ok(DataType::SmallInt(optional_precision?)) - } - } - Keyword::MEDIUMINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedMediumInt(optional_precision?)) - } else { - Ok(DataType::MediumInt(optional_precision?)) - } - } - Keyword::INT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt(optional_precision?)) - } else { - Ok(DataType::Int(optional_precision?)) - } - } - Keyword::INT4 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt4(optional_precision?)) - } else { - Ok(DataType::Int4(optional_precision?)) - } - } - Keyword::INT8 => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInt8(optional_precision?)) - } else { - Ok(DataType::Int8(optional_precision?)) - } - } - Keyword::INT16 => Ok(DataType::Int16), - Keyword::INT32 => Ok(DataType::Int32), - Keyword::INT64 => Ok(DataType::Int64), - Keyword::INT128 => Ok(DataType::Int128), - Keyword::INT256 => Ok(DataType::Int256), - Keyword::INTEGER => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedInteger(optional_precision?)) - } else { - Ok(DataType::Integer(optional_precision?)) - } - } - Keyword::BIGINT => { - let optional_precision = self.parse_optional_precision(); - if self.parse_keyword(Keyword::UNSIGNED) { - Ok(DataType::UnsignedBigInt(optional_precision?)) - } else { - Ok(DataType::BigInt(optional_precision?)) - } - } - Keyword::UINT8 => Ok(DataType::UInt8), - Keyword::UINT16 => Ok(DataType::UInt16), - Keyword::UINT32 => Ok(DataType::UInt32), - Keyword::UINT64 => Ok(DataType::UInt64), - Keyword::UINT128 => Ok(DataType::UInt128), - Keyword::UINT256 => Ok(DataType::UInt256), - Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), - Keyword::NVARCHAR => { - Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) - } - Keyword::CHARACTER => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::CharacterVarying( - self.parse_optional_character_length()?, - )) - } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { - Ok(DataType::CharacterLargeObject( - self.parse_optional_precision()?, - )) - } else { - Ok(DataType::Character(self.parse_optional_character_length()?)) - } - } - Keyword::CHAR => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::CharVarying( - self.parse_optional_character_length()?, - )) - } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { - Ok(DataType::CharLargeObject(self.parse_optional_precision()?)) - } else { - Ok(DataType::Char(self.parse_optional_character_length()?)) - } - } - Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)), - Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), - Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), - Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), - Keyword::TINYBLOB => Ok(DataType::TinyBlob), - Keyword::MEDIUMBLOB => Ok(DataType::MediumBlob), - Keyword::LONGBLOB => Ok(DataType::LongBlob), - Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), - Keyword::BIT => { - if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::BitVarying(self.parse_optional_precision()?)) - } else { - Ok(DataType::Bit(self.parse_optional_precision()?)) - } - } - Keyword::UUID => Ok(DataType::Uuid), - Keyword::DATE => Ok(DataType::Date), - Keyword::DATE32 => Ok(DataType::Date32), - Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), - Keyword::DATETIME64 => { - self.prev_token(); - let (precision, time_zone) = self.parse_datetime_64()?; - Ok(DataType::Datetime64(precision, time_zone)) - } - Keyword::TIMESTAMP => { - let precision = self.parse_optional_precision()?; - let tz = if self.parse_keyword(Keyword::WITH) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithTimeZone - } else if self.parse_keyword(Keyword::WITHOUT) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithoutTimeZone - } else { - TimezoneInfo::None - }; - Ok(DataType::Timestamp(precision, tz)) - } - Keyword::TIMESTAMPTZ => Ok(DataType::Timestamp( - self.parse_optional_precision()?, - TimezoneInfo::Tz, - )), - Keyword::TIME => { - let precision = self.parse_optional_precision()?; - let tz = if self.parse_keyword(Keyword::WITH) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithTimeZone - } else if self.parse_keyword(Keyword::WITHOUT) { - self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; - TimezoneInfo::WithoutTimeZone - } else { - TimezoneInfo::None - }; - Ok(DataType::Time(precision, tz)) - } - Keyword::TIMETZ => Ok(DataType::Time( - self.parse_optional_precision()?, - TimezoneInfo::Tz, - )), - // Interval types can be followed by a complicated interval - // qualifier that we don't currently support. See - // parse_interval for a taste. - Keyword::INTERVAL => Ok(DataType::Interval), - Keyword::JSON => Ok(DataType::JSON), - Keyword::JSONB => Ok(DataType::JSONB), - Keyword::REGCLASS => Ok(DataType::Regclass), - Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), - Keyword::FIXEDSTRING => { - self.expect_token(&Token::LParen)?; - let character_length = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; - Ok(DataType::FixedString(character_length)) - } - Keyword::TEXT => Ok(DataType::Text), - Keyword::TINYTEXT => Ok(DataType::TinyText), - Keyword::MEDIUMTEXT => Ok(DataType::MediumText), - Keyword::LONGTEXT => Ok(DataType::LongText), - Keyword::BYTEA => Ok(DataType::Bytea), - Keyword::NUMERIC => Ok(DataType::Numeric( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::DECIMAL => Ok(DataType::Decimal( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::DEC => Ok(DataType::Dec( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::BIGNUMERIC => Ok(DataType::BigNumeric( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::BIGDECIMAL => Ok(DataType::BigDecimal( - self.parse_exact_number_optional_precision_scale()?, - )), - Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)), - Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))), - Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), - Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), - Keyword::ARRAY => { - if dialect_of!(self is SnowflakeDialect) { - Ok(DataType::Array(ArrayElemTypeDef::None)) - } else if dialect_of!(self is ClickHouseDialect) { - Ok(self.parse_sub_type(|internal_type| { - DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) - })?) - } else { - self.expect_token(&Token::Lt)?; - let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; - trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; - Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( - inside_type, - )))) - } - } - Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => { - self.prev_token(); - let field_defs = self.parse_duckdb_struct_type_def()?; - Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) - } - Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { - self.prev_token(); - let (field_defs, _trailing_bracket) = - self.parse_struct_type_def(Self::parse_struct_field_def)?; - trailing_bracket = _trailing_bracket; - Ok(DataType::Struct( - field_defs, - StructBracketKind::AngleBrackets, - )) - } - Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { - self.prev_token(); - let fields = self.parse_union_type_def()?; - Ok(DataType::Union(fields)) - } - Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - Ok(self.parse_sub_type(DataType::Nullable)?) - } - Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - Ok(self.parse_sub_type(DataType::LowCardinality)?) - } - Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - self.prev_token(); - let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; - Ok(DataType::Map( - Box::new(key_data_type), - Box::new(value_data_type), - )) - } - Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - self.expect_token(&Token::LParen)?; - let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; - self.expect_token(&Token::RParen)?; - Ok(DataType::Nested(field_defs)) - } - Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { - self.prev_token(); - let field_defs = self.parse_click_house_tuple_def()?; - Ok(DataType::Tuple(field_defs)) - } - Keyword::TRIGGER => Ok(DataType::Trigger), - _ => { - self.prev_token(); - let type_name = self.parse_object_name(false)?; - if let Some(modifiers) = self.parse_optional_type_modifiers()? { - Ok(DataType::Custom(type_name, modifiers)) - } else { - Ok(DataType::Custom(type_name, vec![])) - } - } - }, - _ => self.expected("a data type name", next_token), - }?; - - // Parse array data types. Note: this is postgresql-specific and different from - // Keyword::ARRAY syntax from above - while self.consume_token(&Token::LBracket) { - let size = if dialect_of!(self is GenericDialect | DuckDbDialect | PostgreSqlDialect) { - self.maybe_parse(|p| p.parse_literal_uint())? - } else { - None - }; - self.expect_token(&Token::RBracket)?; - data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data), size)) - } - Ok((data, trailing_bracket)) - } - - pub fn parse_string_values(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let mut values = Vec::new(); - loop { - let next_token = self.next_token(); - match next_token.token { - Token::SingleQuotedString(value) => values.push(value), - _ => self.expected("a string", next_token)?, - } - let next_token = self.next_token(); - match next_token.token { - Token::Comma => (), - Token::RParen => break, - _ => self.expected(", or }", next_token)?, - } - } - Ok(values) - } - - /// Strictly parse `identifier AS identifier` - pub fn parse_identifier_with_alias(&mut self) -> Result { - let ident = self.parse_identifier(false)?; - self.expect_keyword(Keyword::AS)?; - let alias = self.parse_identifier(false)?; - Ok(IdentWithAlias { ident, alias }) - } - - /// Parse `AS identifier` (or simply `identifier` if it's not a reserved keyword) - /// Some examples with aliases: `SELECT 1 foo`, `SELECT COUNT(*) AS cnt`, - /// `SELECT ... FROM t1 foo, t2 bar`, `SELECT ... FROM (...) AS bar` - pub fn parse_optional_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - let after_as = self.parse_keyword(Keyword::AS); - let next_token = self.next_token(); - match next_token.token { - // Accept any identifier after `AS` (though many dialects have restrictions on - // keywords that may appear here). If there's no `AS`: don't parse keywords, - // which may start a construct allowed in this position, to be parsed as aliases. - // (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword, - // not an alias.) - Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => { - Ok(Some(w.to_ident(next_token.span))) - } - // MSSQL supports single-quoted strings as aliases for columns - // We accept them as table aliases too, although MSSQL does not. - // - // Note, that this conflicts with an obscure rule from the SQL - // standard, which we don't implement: - // https://crate.io/docs/sql-99/en/latest/chapters/07.html#character-string-literal-s - // "[Obscure Rule] SQL allows you to break a long up into two or more smaller s, split by a that includes a newline - // character. When it sees such a , your DBMS will - // ignore the and treat the multiple strings as - // a single ." - Token::SingleQuotedString(s) => Ok(Some(Ident::with_quote('\'', s))), - // Support for MySql dialect double-quoted string, `AS "HOUR"` for example - Token::DoubleQuotedString(s) => Ok(Some(Ident::with_quote('\"', s))), - _ => { - if after_as { - return self.expected("an identifier after AS", next_token); - } - self.prev_token(); - Ok(None) // no alias found - } - } - } - - /// Parse `AS identifier` when the AS is describing a table-valued object, - /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case - /// the alias is allowed to optionally name the columns in the table, in - /// addition to the table itself. - pub fn parse_optional_table_alias( - &mut self, - reserved_kwds: &[Keyword], - ) -> Result, ParserError> { - match self.parse_optional_alias(reserved_kwds)? { - Some(name) => { - let columns = self.parse_table_alias_column_defs()?; - Ok(Some(TableAlias { name, columns })) - } - None => Ok(None), - } - } - - pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { - let expressions = if self.parse_keyword(Keyword::ALL) { - None - } else { - Some(self.parse_comma_separated(Parser::parse_group_by_expr)?) - }; - - let mut modifiers = vec![]; - if dialect_of!(self is ClickHouseDialect | GenericDialect) { - loop { - if !self.parse_keyword(Keyword::WITH) { - break; - } - let keyword = self.expect_one_of_keywords(&[ - Keyword::ROLLUP, - Keyword::CUBE, - Keyword::TOTALS, - ])?; - modifiers.push(match keyword { - Keyword::ROLLUP => GroupByWithModifier::Rollup, - Keyword::CUBE => GroupByWithModifier::Cube, - Keyword::TOTALS => GroupByWithModifier::Totals, - _ => { - return parser_err!( - "BUG: expected to match GroupBy modifier keyword", - self.peek_token().span.start - ) - } - }); - } - } - let group_by = match expressions { - None => GroupByExpr::All(modifiers), - Some(exprs) => GroupByExpr::Expressions(exprs, modifiers), - }; - Ok(Some(group_by)) - } else { - Ok(None) - } - } - - pub fn parse_optional_order_by(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; - let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { - self.parse_interpolations()? - } else { - None - }; - - Ok(Some(OrderBy { - exprs: order_by_exprs, - interpolate, - })) - } else { - Ok(None) - } - } - - /// Parse a possibly qualified, possibly quoted identifier, e.g. - /// `foo` or `myschema."table" - /// - /// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN, - /// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers - /// in this context on BigQuery. - pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result { - let mut idents = vec![]; - loop { - if self.dialect.supports_object_name_double_dot_notation() - && idents.len() == 1 - && self.consume_token(&Token::Period) - { - // Empty string here means default schema - idents.push(Ident::new("")); - } - idents.push(self.parse_identifier(in_table_clause)?); - if !self.consume_token(&Token::Period) { - break; - } - } - - // BigQuery accepts any number of quoted identifiers of a table name. - // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers - if dialect_of!(self is BigQueryDialect) - && idents.iter().any(|ident| ident.value.contains('.')) - { - idents = idents - .into_iter() - .flat_map(|ident| { - ident - .value - .split('.') - .map(|value| Ident { - value: value.into(), - quote_style: ident.quote_style, - span: ident.span, - }) - .collect::>() - }) - .collect() - } - - Ok(ObjectName(idents)) - } - - /// Parse identifiers - pub fn parse_identifiers(&mut self) -> Result, ParserError> { - let mut idents = vec![]; - loop { - match self.peek_token().token { - Token::Word(w) => { - idents.push(w.to_ident(self.peek_token().span)); - } - Token::EOF | Token::Eq => break, - _ => {} - } - self.next_token(); - } - Ok(idents) - } - - /// Parse identifiers of form ident1[.identN]* - /// - /// Similar in functionality to [parse_identifiers], with difference - /// being this function is much more strict about parsing a valid multipart identifier, not - /// allowing extraneous tokens to be parsed, otherwise it fails. - /// - /// For example: - /// - /// ```rust - /// use sqlparser::ast::Ident; - /// use sqlparser::dialect::GenericDialect; - /// use sqlparser::parser::Parser; - /// - /// let dialect = GenericDialect {}; - /// let expected = vec![Ident::new("one"), Ident::new("two")]; - /// - /// // expected usage - /// let sql = "one.two"; - /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); - /// let actual = parser.parse_multipart_identifier().unwrap(); - /// assert_eq!(&actual, &expected); - /// - /// // parse_identifiers is more loose on what it allows, parsing successfully - /// let sql = "one + two"; - /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); - /// let actual = parser.parse_identifiers().unwrap(); - /// assert_eq!(&actual, &expected); - /// - /// // expected to strictly fail due to + separator - /// let sql = "one + two"; - /// let mut parser = Parser::new(&dialect).try_with_sql(sql).unwrap(); - /// let actual = parser.parse_multipart_identifier().unwrap_err(); - /// assert_eq!( - /// actual.to_string(), - /// "sql parser error: Unexpected token in identifier: +" - /// ); - /// ``` - /// - /// [parse_identifiers]: Parser::parse_identifiers - pub fn parse_multipart_identifier(&mut self) -> Result, ParserError> { - let mut idents = vec![]; - - // expecting at least one word for identifier - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => idents.push(w.to_ident(next_token.span)), - Token::EOF => { - return Err(ParserError::ParserError( - "Empty input when parsing identifier".to_string(), - ))? - } - token => { - return Err(ParserError::ParserError(format!( - "Unexpected token in identifier: {token}" - )))? - } - }; - - // parse optional next parts if exist - loop { - match self.next_token().token { - // ensure that optional period is succeeded by another identifier - Token::Period => { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => idents.push(w.to_ident(next_token.span)), - Token::EOF => { - return Err(ParserError::ParserError( - "Trailing period in identifier".to_string(), - ))? - } - token => { - return Err(ParserError::ParserError(format!( - "Unexpected token following period in identifier: {token}" - )))? - } - } - } - Token::EOF => break, - token => { - return Err(ParserError::ParserError(format!( - "Unexpected token in identifier: {token}" - )))? - } - } - } - - Ok(idents) - } - - /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) - /// - /// The `in_table_clause` parameter indicates whether the identifier is a table in a FROM, JOIN, or - /// similar table clause. Currently, this is used only to support unquoted hyphenated identifiers in - // this context on BigQuery. - pub fn parse_identifier(&mut self, in_table_clause: bool) -> Result { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => { - let mut ident = w.to_ident(next_token.span); - - // On BigQuery, hyphens are permitted in unquoted identifiers inside of a FROM or - // TABLE clause [0]. - // - // The first segment must be an ordinary unquoted identifier, e.g. it must not start - // with a digit. Subsequent segments are either must either be valid identifiers or - // integers, e.g. foo-123 is allowed, but foo-123a is not. - // - // [0] https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical - if dialect_of!(self is BigQueryDialect) - && w.quote_style.is_none() - && in_table_clause - { - let mut requires_whitespace = false; - while matches!(self.peek_token_no_skip().token, Token::Minus) { - self.next_token(); - ident.value.push('-'); - - let token = self - .next_token_no_skip() - .cloned() - .unwrap_or(TokenWithSpan::wrap(Token::EOF)); - requires_whitespace = match token.token { - Token::Word(next_word) if next_word.quote_style.is_none() => { - ident.value.push_str(&next_word.value); - false - } - Token::Number(s, false) if s.chars().all(|c| c.is_ascii_digit()) => { - ident.value.push_str(&s); - true - } - _ => { - return self - .expected("continuation of hyphenated identifier", token); - } - } - } - - // If the last segment was a number, we must check that it's followed by whitespace, - // otherwise foo-123a will be parsed as `foo-123` with the alias `a`. - if requires_whitespace { - let token = self.next_token(); - if !matches!(token.token, Token::EOF | Token::Whitespace(_)) { - return self - .expected("whitespace following hyphenated identifier", token); - } - } - } - Ok(ident) - } - Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)), - Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)), - _ => self.expected("identifier", next_token), - } - } - - /// Parses a parenthesized, comma-separated list of column definitions within a view. - fn parse_view_columns(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - if self.peek_token().token == Token::RParen { - self.next_token(); - Ok(vec![]) - } else { - let cols = self.parse_comma_separated(Parser::parse_view_column)?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } - } else { - Ok(vec![]) - } - } - - /// Parses a column definition within a view. - fn parse_view_column(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let options = if (dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::OPTIONS)) - || (dialect_of!(self is SnowflakeDialect | GenericDialect) - && self.parse_keyword(Keyword::COMMENT)) - { - self.prev_token(); - self.parse_optional_column_option()? - .map(|option| vec![option]) - } else { - None - }; - let data_type = if dialect_of!(self is ClickHouseDialect) { - Some(self.parse_data_type()?) - } else { - None - }; - Ok(ViewColumnDef { - name, - data_type, - options, - }) - } - - /// Parse a parenthesized comma-separated list of unqualified, possibly quoted identifiers - pub fn parse_parenthesized_column_list( - &mut self, - optional: IsOptional, - allow_empty: bool, - ) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - if allow_empty && self.peek_token().token == Token::RParen { - self.next_token(); - Ok(vec![]) - } else { - let cols = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } - } else if optional == Optional { - Ok(vec![]) - } else { - self.expected("a list of columns in parentheses", self.peek_token()) - } - } - - /// Parse a parenthesized comma-separated list of table alias column definitions. - fn parse_table_alias_column_defs(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let cols = self.parse_comma_separated(|p| { - let name = p.parse_identifier(false)?; - let data_type = p.maybe_parse(|p| p.parse_data_type())?; - Ok(TableAliasColumnDef { name, data_type }) - })?; - self.expect_token(&Token::RParen)?; - Ok(cols) - } else { - Ok(vec![]) - } - } - - pub fn parse_precision(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let n = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; - Ok(n) - } - - pub fn parse_optional_precision(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let n = self.parse_literal_uint()?; - self.expect_token(&Token::RParen)?; - Ok(Some(n)) - } else { - Ok(None) - } - } - - /// Parse datetime64 [1] - /// Syntax - /// ```sql - /// DateTime64(precision[, timezone]) - /// ``` - /// - /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 - pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { - self.expect_keyword(Keyword::DATETIME64)?; - self.expect_token(&Token::LParen)?; - let precision = self.parse_literal_uint()?; - let time_zone = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_string()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - Ok((precision, time_zone)) - } - - pub fn parse_optional_character_length( - &mut self, - ) -> Result, ParserError> { - if self.consume_token(&Token::LParen) { - let character_length = self.parse_character_length()?; - self.expect_token(&Token::RParen)?; - Ok(Some(character_length)) - } else { - Ok(None) - } - } - - pub fn parse_character_length(&mut self) -> Result { - if self.parse_keyword(Keyword::MAX) { - return Ok(CharacterLength::Max); - } - let length = self.parse_literal_uint()?; - let unit = if self.parse_keyword(Keyword::CHARACTERS) { - Some(CharLengthUnits::Characters) - } else if self.parse_keyword(Keyword::OCTETS) { - Some(CharLengthUnits::Octets) - } else { - None - }; - Ok(CharacterLength::IntegerLength { length, unit }) - } - - pub fn parse_optional_precision_scale( - &mut self, - ) -> Result<(Option, Option), ParserError> { - if self.consume_token(&Token::LParen) { - let n = self.parse_literal_uint()?; - let scale = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_uint()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - Ok((Some(n), scale)) - } else { - Ok((None, None)) - } - } - - pub fn parse_exact_number_optional_precision_scale( - &mut self, - ) -> Result { - if self.consume_token(&Token::LParen) { - let precision = self.parse_literal_uint()?; - let scale = if self.consume_token(&Token::Comma) { - Some(self.parse_literal_uint()?) - } else { - None - }; - - self.expect_token(&Token::RParen)?; - - match scale { - None => Ok(ExactNumberInfo::Precision(precision)), - Some(scale) => Ok(ExactNumberInfo::PrecisionAndScale(precision, scale)), - } - } else { - Ok(ExactNumberInfo::None) - } - } - - pub fn parse_optional_type_modifiers(&mut self) -> Result>, ParserError> { - if self.consume_token(&Token::LParen) { - let mut modifiers = Vec::new(); - loop { - let next_token = self.next_token(); - match next_token.token { - Token::Word(w) => modifiers.push(w.to_string()), - Token::Number(n, _) => modifiers.push(n), - Token::SingleQuotedString(s) => modifiers.push(s), - - Token::Comma => { - continue; - } - Token::RParen => { - break; - } - _ => self.expected("type modifiers", next_token)?, - } - } - - Ok(Some(modifiers)) - } else { - Ok(None) - } - } - - /// Parse a parenthesized sub data type - fn parse_sub_type(&mut self, parent_type: F) -> Result - where - F: FnOnce(Box) -> DataType, - { - self.expect_token(&Token::LParen)?; - let inside_type = self.parse_data_type()?; - self.expect_token(&Token::RParen)?; - Ok(parent_type(inside_type.into())) - } - - pub fn parse_delete(&mut self) -> Result { - let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { - // `FROM` keyword is optional in BigQuery SQL. - // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement - if dialect_of!(self is BigQueryDialect | GenericDialect) { - (vec![], false) - } else { - let tables = self.parse_comma_separated(|p| p.parse_object_name(false))?; - self.expect_keyword(Keyword::FROM)?; - (tables, true) - } - } else { - (vec![], true) - }; - - let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; - let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) - } else { - None - }; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - let returning = if self.parse_keyword(Keyword::RETURNING) { - Some(self.parse_comma_separated(Parser::parse_select_item)?) - } else { - None - }; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - let limit = if self.parse_keyword(Keyword::LIMIT) { - self.parse_limit()? - } else { - None - }; - - Ok(Statement::Delete(Delete { - tables, - from: if with_from_keyword { - FromTable::WithFromKeyword(from) - } else { - FromTable::WithoutKeyword(from) - }, - using, - selection, - returning, - order_by, - limit, - })) - } - - // KILL [CONNECTION | QUERY | MUTATION] processlist_id - pub fn parse_kill(&mut self) -> Result { - let modifier_keyword = - self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); - - let id = self.parse_literal_uint()?; - - let modifier = match modifier_keyword { - Some(Keyword::CONNECTION) => Some(KillType::Connection), - Some(Keyword::QUERY) => Some(KillType::Query), - Some(Keyword::MUTATION) => { - if dialect_of!(self is ClickHouseDialect | GenericDialect) { - Some(KillType::Mutation) - } else { - self.expected( - "Unsupported type for KILL, allowed: CONNECTION | QUERY", - self.peek_token(), - )? - } - } - _ => None, - }; - - Ok(Statement::Kill { modifier, id }) - } - - pub fn parse_explain( - &mut self, - describe_alias: DescribeAlias, - ) -> Result { - let mut analyze = false; - let mut verbose = false; - let mut query_plan = false; - let mut format = None; - let mut options = None; - - // Note: DuckDB is compatible with PostgreSQL syntax for this statement, - // although not all features may be implemented. - if describe_alias == DescribeAlias::Explain - && self.dialect.supports_explain_with_utility_options() - && self.peek_token().token == Token::LParen - { - options = Some(self.parse_utility_options()?) - } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { - query_plan = true; - } else { - analyze = self.parse_keyword(Keyword::ANALYZE); - verbose = self.parse_keyword(Keyword::VERBOSE); - if self.parse_keyword(Keyword::FORMAT) { - format = Some(self.parse_analyze_format()?); - } - } - - match self.maybe_parse(|parser| parser.parse_statement())? { - Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err( - ParserError::ParserError("Explain must be root of the plan".to_string()), - ), - Some(statement) => Ok(Statement::Explain { - describe_alias, - analyze, - verbose, - query_plan, - statement: Box::new(statement), - format, - options, - }), - _ => { - let hive_format = - match self.parse_one_of_keywords(&[Keyword::EXTENDED, Keyword::FORMATTED]) { - Some(Keyword::EXTENDED) => Some(HiveDescribeFormat::Extended), - Some(Keyword::FORMATTED) => Some(HiveDescribeFormat::Formatted), - _ => None, - }; - - let has_table_keyword = if self.dialect.describe_requires_table_keyword() { - // only allow to use TABLE keyword for DESC|DESCRIBE statement - self.parse_keyword(Keyword::TABLE) - } else { - false - }; - - let table_name = self.parse_object_name(false)?; - Ok(Statement::ExplainTable { - describe_alias, - hive_format, - has_table_keyword, - table_name, - }) - } - } - } - - /// Parse a query expression, i.e. a `SELECT` statement optionally - /// preceded with some `WITH` CTE declarations and optionally followed - /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't - /// expect the initial keyword to be already consumed - pub fn parse_query(&mut self) -> Result, ParserError> { - let _guard = self.recursion_counter.try_decrease()?; - let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) { - Some(With { - with_token: with_token.into(), - recursive: self.parse_keyword(Keyword::RECURSIVE), - cte_tables: self.parse_comma_separated(Parser::parse_cte)?, - }) - } else { - None - }; - if self.parse_keyword(Keyword::INSERT) { - Ok(Query { - with, - body: self.parse_insert_setexpr_boxed()?, - limit: None, - limit_by: vec![], - order_by: None, - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - } - .into()) - } else if self.parse_keyword(Keyword::UPDATE) { - Ok(Query { - with, - body: self.parse_update_setexpr_boxed()?, - limit: None, - limit_by: vec![], - order_by: None, - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - } - .into()) - } else { - let body = self.parse_query_body(self.dialect.prec_unknown())?; - - let order_by = self.parse_optional_order_by()?; - - let mut limit = None; - let mut offset = None; - - for _x in 0..2 { - if limit.is_none() && self.parse_keyword(Keyword::LIMIT) { - limit = self.parse_limit()? - } - - if offset.is_none() && self.parse_keyword(Keyword::OFFSET) { - offset = Some(self.parse_offset()?) - } - - if self.dialect.supports_limit_comma() - && limit.is_some() - && offset.is_none() - && self.consume_token(&Token::Comma) - { - // MySQL style LIMIT x,y => LIMIT y OFFSET x. - // Check for more details. - offset = Some(Offset { - value: limit.unwrap(), - rows: OffsetRows::None, - }); - limit = Some(self.parse_expr()?); - } - } - - let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::BY) - { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let settings = self.parse_settings()?; - - let fetch = if self.parse_keyword(Keyword::FETCH) { - Some(self.parse_fetch()?) - } else { - None - }; - - let mut for_clause = None; - let mut locks = Vec::new(); - while self.parse_keyword(Keyword::FOR) { - if let Some(parsed_for_clause) = self.parse_for_clause()? { - for_clause = Some(parsed_for_clause); - break; - } else { - locks.push(self.parse_lock()?); - } - } - let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::FORMAT) - { - if self.parse_keyword(Keyword::NULL) { - Some(FormatClause::Null) - } else { - let ident = self.parse_identifier(false)?; - Some(FormatClause::Identifier(ident)) - } - } else { - None - }; - - Ok(Query { - with, - body, - order_by, - limit, - limit_by, - offset, - fetch, - locks, - for_clause, - settings, - format_clause, - } - .into()) - } - } - - fn parse_settings(&mut self) -> Result>, ParserError> { - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) - { - let key_values = self.parse_comma_separated(|p| { - let key = p.parse_identifier(false)?; - p.expect_token(&Token::Eq)?; - let value = p.parse_value()?; - Ok(Setting { key, value }) - })?; - Some(key_values) - } else { - None - }; - Ok(settings) - } - - /// Parse a mssql `FOR [XML | JSON | BROWSE]` clause - pub fn parse_for_clause(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::XML) { - Ok(Some(self.parse_for_xml()?)) - } else if self.parse_keyword(Keyword::JSON) { - Ok(Some(self.parse_for_json()?)) - } else if self.parse_keyword(Keyword::BROWSE) { - Ok(Some(ForClause::Browse)) - } else { - Ok(None) - } - } - - /// Parse a mssql `FOR XML` clause - pub fn parse_for_xml(&mut self) -> Result { - let for_xml = if self.parse_keyword(Keyword::RAW) { - let mut element_name = None; - if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - element_name = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } - ForXml::Raw(element_name) - } else if self.parse_keyword(Keyword::AUTO) { - ForXml::Auto - } else if self.parse_keyword(Keyword::EXPLICIT) { - ForXml::Explicit - } else if self.parse_keyword(Keyword::PATH) { - let mut element_name = None; - if self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - element_name = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } - ForXml::Path(element_name) - } else { - return Err(ParserError::ParserError( - "Expected FOR XML [RAW | AUTO | EXPLICIT | PATH ]".to_string(), - )); - }; - let mut elements = false; - let mut binary_base64 = false; - let mut root = None; - let mut r#type = false; - while self.peek_token().token == Token::Comma { - self.next_token(); - if self.parse_keyword(Keyword::ELEMENTS) { - elements = true; - } else if self.parse_keyword(Keyword::BINARY) { - self.expect_keyword(Keyword::BASE64)?; - binary_base64 = true; - } else if self.parse_keyword(Keyword::ROOT) { - self.expect_token(&Token::LParen)?; - root = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } else if self.parse_keyword(Keyword::TYPE) { - r#type = true; - } - } - Ok(ForClause::Xml { - for_xml, - elements, - binary_base64, - root, - r#type, - }) - } - - /// Parse a mssql `FOR JSON` clause - pub fn parse_for_json(&mut self) -> Result { - let for_json = if self.parse_keyword(Keyword::AUTO) { - ForJson::Auto - } else if self.parse_keyword(Keyword::PATH) { - ForJson::Path - } else { - return Err(ParserError::ParserError( - "Expected FOR JSON [AUTO | PATH ]".to_string(), - )); - }; - let mut root = None; - let mut include_null_values = false; - let mut without_array_wrapper = false; - while self.peek_token().token == Token::Comma { - self.next_token(); - if self.parse_keyword(Keyword::ROOT) { - self.expect_token(&Token::LParen)?; - root = Some(self.parse_literal_string()?); - self.expect_token(&Token::RParen)?; - } else if self.parse_keyword(Keyword::INCLUDE_NULL_VALUES) { - include_null_values = true; - } else if self.parse_keyword(Keyword::WITHOUT_ARRAY_WRAPPER) { - without_array_wrapper = true; - } - } - Ok(ForClause::Json { - for_json, - root, - include_null_values, - without_array_wrapper, - }) - } - - /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) - pub fn parse_cte(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let mut cte = if self.parse_keyword(Keyword::AS) { - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); - } - } - self.expect_token(&Token::LParen)?; - - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; - - let alias = TableAlias { - name, - columns: vec![], - }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), - } - } else { - let columns = self.parse_table_alias_column_defs()?; - self.expect_keyword(Keyword::AS)?; - let mut is_materialized = None; - if dialect_of!(self is PostgreSqlDialect) { - if self.parse_keyword(Keyword::MATERIALIZED) { - is_materialized = Some(CteAsMaterialized::Materialized); - } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { - is_materialized = Some(CteAsMaterialized::NotMaterialized); - } - } - self.expect_token(&Token::LParen)?; - - let query = self.parse_query()?; - let closing_paren_token = self.expect_token(&Token::RParen)?; - - let alias = TableAlias { name, columns }; - Cte { - alias, - query, - from: None, - materialized: is_materialized, - closing_paren_token: closing_paren_token.into(), - } - }; - if self.parse_keyword(Keyword::FROM) { - cte.from = Some(self.parse_identifier(false)?); - } - Ok(cte) - } - - /// Parse a "query body", which is an expression with roughly the - /// following grammar: - /// ```sql - /// query_body ::= restricted_select | '(' subquery ')' | set_operation - /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] - /// subquery ::= query_body [ order_by_limit ] - /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body - /// ``` - pub fn parse_query_body(&mut self, precedence: u8) -> Result, ParserError> { - // We parse the expression using a Pratt parser, as in `parse_expr()`. - // Start by parsing a restricted SELECT or a `(subquery)`: - let expr = if self.peek_keyword(Keyword::SELECT) { - SetExpr::Select(self.parse_select().map(Box::new)?) - } else if self.consume_token(&Token::LParen) { - // CTEs are not allowed here, but the parser currently accepts them - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - SetExpr::Query(subquery) - } else if self.parse_keyword(Keyword::VALUES) { - let is_mysql = dialect_of!(self is MySqlDialect); - SetExpr::Values(self.parse_values(is_mysql)?) - } else if self.parse_keyword(Keyword::TABLE) { - SetExpr::Table(Box::new(self.parse_as_table()?)) - } else { - return self.expected( - "SELECT, VALUES, or a subquery in the query body", - self.peek_token(), - ); - }; - - self.parse_remaining_set_exprs(expr, precedence) - } - - /// Parse any extra set expressions that may be present in a query body - /// - /// (this is its own function to reduce required stack size in debug builds) - fn parse_remaining_set_exprs( - &mut self, - mut expr: SetExpr, - precedence: u8, - ) -> Result, ParserError> { - loop { - // The query can be optionally followed by a set operator: - let op = self.parse_set_operator(&self.peek_token().token); - let next_precedence = match op { - // UNION and EXCEPT have the same binding power and evaluate left-to-right - Some(SetOperator::Union) | Some(SetOperator::Except) => 10, - // INTERSECT has higher precedence than UNION/EXCEPT - Some(SetOperator::Intersect) => 20, - // Unexpected token or EOF => stop parsing the query body - None => break, - }; - if precedence >= next_precedence { - break; - } - self.next_token(); // skip past the set operator - let set_quantifier = self.parse_set_quantifier(&op); - expr = SetExpr::SetOperation { - left: Box::new(expr), - op: op.unwrap(), - set_quantifier, - right: self.parse_query_body(next_precedence)?, - }; - } - - Ok(expr.into()) - } - - pub fn parse_set_operator(&mut self, token: &Token) -> Option { - match token { - Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), - Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), - Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), - _ => None, - } - } - - pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { - match op { - Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { - if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { - SetQuantifier::DistinctByName - } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { - SetQuantifier::ByName - } else if self.parse_keyword(Keyword::ALL) { - if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { - SetQuantifier::AllByName - } else { - SetQuantifier::All - } - } else if self.parse_keyword(Keyword::DISTINCT) { - SetQuantifier::Distinct - } else { - SetQuantifier::None - } - } - _ => SetQuantifier::None, - } - } - - /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`) - pub fn parse_select(&mut self) -> Result { - let select_token = self.expect_keyword(Keyword::SELECT)?; - let value_table_mode = - if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { - if self.parse_keyword(Keyword::VALUE) { - Some(ValueTableMode::AsValue) - } else if self.parse_keyword(Keyword::STRUCT) { - Some(ValueTableMode::AsStruct) - } else { - self.expected("VALUE or STRUCT", self.peek_token())? - } - } else { - None - }; - - let mut top_before_distinct = false; - let mut top = None; - if self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { - top = Some(self.parse_top()?); - top_before_distinct = true; - } - let distinct = self.parse_all_or_distinct()?; - if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { - top = Some(self.parse_top()?); - } - - let projection = self.parse_projection()?; - - let into = if self.parse_keyword(Keyword::INTO) { - let temporary = self - .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) - .is_some(); - let unlogged = self.parse_keyword(Keyword::UNLOGGED); - let table = self.parse_keyword(Keyword::TABLE); - let name = self.parse_object_name(false)?; - Some(SelectInto { - temporary, - unlogged, - table, - name, - }) - } else { - None - }; - - // Note that for keywords to be properly handled here, they need to be - // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, - // otherwise they may be parsed as an alias as part of the `projection` - // or `from`. - - let from = if self.parse_keyword(Keyword::FROM) { - self.parse_comma_separated(Parser::parse_table_and_joins)? - } else { - vec![] - }; - - let mut lateral_views = vec![]; - loop { - if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { - let outer = self.parse_keyword(Keyword::OUTER); - let lateral_view = self.parse_expr()?; - let lateral_view_name = self.parse_object_name(false)?; - let lateral_col_alias = self - .parse_comma_separated(|parser| { - parser.parse_optional_alias(&[ - Keyword::WHERE, - Keyword::GROUP, - Keyword::CLUSTER, - Keyword::HAVING, - Keyword::LATERAL, - ]) // This couldn't possibly be a bad idea - })? - .into_iter() - .flatten() - .collect(); - - lateral_views.push(LateralView { - lateral_view, - lateral_view_name, - lateral_col_alias, - outer, - }); - } else { - break; - } - } - - let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::PREWHERE) - { - Some(self.parse_expr()?) - } else { - None - }; - - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - - let group_by = self - .parse_optional_group_by()? - .unwrap_or_else(|| GroupByExpr::Expressions(vec![], vec![])); - - let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let having = if self.parse_keyword(Keyword::HAVING) { - Some(self.parse_expr()?) - } else { - None - }; - - // Accept QUALIFY and WINDOW in any order and flag accordingly. - let (named_windows, qualify, window_before_qualify) = if self.parse_keyword(Keyword::WINDOW) - { - let named_windows = self.parse_comma_separated(Parser::parse_named_window)?; - if self.parse_keyword(Keyword::QUALIFY) { - (named_windows, Some(self.parse_expr()?), true) - } else { - (named_windows, None, true) - } - } else if self.parse_keyword(Keyword::QUALIFY) { - let qualify = Some(self.parse_expr()?); - if self.parse_keyword(Keyword::WINDOW) { - ( - self.parse_comma_separated(Parser::parse_named_window)?, - qualify, - false, - ) - } else { - (Default::default(), qualify, false) - } - } else { - Default::default() - }; - - let connect_by = if self.dialect.supports_connect_by() - && self - .parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT]) - .is_some() - { - self.prev_token(); - Some(self.parse_connect_by()?) - } else { - None - }; - - Ok(Select { - select_token: AttachedToken(select_token), - distinct, - top, - top_before_distinct, - projection, - into, - from, - lateral_views, - prewhere, - selection, - group_by, - cluster_by, - distribute_by, - sort_by, - having, - named_window: named_windows, - window_before_qualify, - qualify, - value_table_mode, - connect_by, - }) - } - - /// Invoke `f` after first setting the parser's `ParserState` to `state`. - /// - /// Upon return, restores the parser's state to what it started at. - fn with_state(&mut self, state: ParserState, mut f: F) -> Result - where - F: FnMut(&mut Parser) -> Result, - { - let current_state = self.state; - self.state = state; - let res = f(self); - self.state = current_state; - res - } - - pub fn parse_connect_by(&mut self) -> Result { - let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - (condition, relationships) - } else { - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?; - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - (condition, relationships) - }; - Ok(ConnectBy { - condition, - relationships, - }) - } - - /// Parse `CREATE TABLE x AS TABLE y` - pub fn parse_as_table(&mut self) -> Result { - let token1 = self.next_token(); - let token2 = self.next_token(); - let token3 = self.next_token(); - - let table_name; - let schema_name; - if token2 == Token::Period { - match token1.token { - Token::Word(w) => { - schema_name = w.value; - } - _ => { - return self.expected("Schema name", token1); - } - } - match token3.token { - Token::Word(w) => { - table_name = w.value; - } - _ => { - return self.expected("Table name", token3); - } - } - Ok(Table { - table_name: Some(table_name), - schema_name: Some(schema_name), - }) - } else { - match token1.token { - Token::Word(w) => { - table_name = w.value; - } - _ => { - return self.expected("Table name", token1); - } - } - Ok(Table { - table_name: Some(table_name), - schema_name: None, - }) - } - } - - /// Parse a `SET ROLE` statement. Expects SET to be consumed already. - fn parse_set_role(&mut self, modifier: Option) -> Result { - self.expect_keyword(Keyword::ROLE)?; - let context_modifier = match modifier { - Some(Keyword::LOCAL) => ContextModifier::Local, - Some(Keyword::SESSION) => ContextModifier::Session, - _ => ContextModifier::None, - }; - - let role_name = if self.parse_keyword(Keyword::NONE) { - None - } else { - Some(self.parse_identifier(false)?) - }; - Ok(Statement::SetRole { - context_modifier, - role_name, - }) - } - - pub fn parse_set(&mut self) -> Result { - let modifier = - self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); - if let Some(Keyword::HIVEVAR) = modifier { - self.expect_token(&Token::Colon)?; - } else if let Some(set_role_stmt) = - self.maybe_parse(|parser| parser.parse_set_role(modifier))? - { - return Ok(set_role_stmt); - } - - let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { - OneOrManyWithParens::One(ObjectName(vec!["TIMEZONE".into()])) - } else if self.dialect.supports_parenthesized_set_variables() - && self.consume_token(&Token::LParen) - { - let variables = OneOrManyWithParens::Many( - self.parse_comma_separated(|parser: &mut Parser<'a>| { - parser.parse_identifier(false) - })? - .into_iter() - .map(|ident| ObjectName(vec![ident])) - .collect(), - ); - self.expect_token(&Token::RParen)?; - variables - } else { - OneOrManyWithParens::One(self.parse_object_name(false)?) - }; - - if matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES") - && dialect_of!(self is MySqlDialect | GenericDialect)) - { - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::SetNamesDefault {}); - } - - let charset_name = self.parse_literal_string()?; - let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { - Some(self.parse_literal_string()?) - } else { - None - }; - - return Ok(Statement::SetNames { - charset_name, - collation_name, - }); - } - - let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); - - if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - if parenthesized_assignment { - self.expect_token(&Token::LParen)?; - } - - let mut values = vec![]; - loop { - let value = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Ok(expr) = self.parse_expr() { - expr - } else { - self.expected("variable value", self.peek_token())? - }; - - values.push(value); - if self.consume_token(&Token::Comma) { - continue; - } - - if parenthesized_assignment { - self.expect_token(&Token::RParen)?; - } - return Ok(Statement::SetVariable { - local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, - variables, - value: values, - }); - } - } - - let OneOrManyWithParens::One(variable) = variables else { - return self.expected("set variable", self.peek_token()); - }; - - if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { - // for some db (e.g. postgresql), SET TIME ZONE is an alias for SET TIMEZONE [TO|=] - match self.parse_expr() { - Ok(expr) => Ok(Statement::SetTimeZone { - local: modifier == Some(Keyword::LOCAL), - value: expr, - }), - _ => self.expected("timezone value", self.peek_token())?, - } - } else if variable.to_string() == "CHARACTERISTICS" { - self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - snapshot: None, - session: true, - }) - } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { - if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?; - return Ok(Statement::SetTransaction { - modes: vec![], - snapshot: Some(snapshot_id), - session: false, - }); - } - Ok(Statement::SetTransaction { - modes: self.parse_transaction_modes()?, - snapshot: None, - session: false, - }) - } else { - self.expected("equals sign or TO", self.peek_token()) - } - } - - pub fn parse_show(&mut self) -> Result { - let terse = self.parse_keyword(Keyword::TERSE); - let extended = self.parse_keyword(Keyword::EXTENDED); - let full = self.parse_keyword(Keyword::FULL); - let session = self.parse_keyword(Keyword::SESSION); - let global = self.parse_keyword(Keyword::GLOBAL); - let external = self.parse_keyword(Keyword::EXTERNAL); - if self - .parse_one_of_keywords(&[Keyword::COLUMNS, Keyword::FIELDS]) - .is_some() - { - Ok(self.parse_show_columns(extended, full)?) - } else if self.parse_keyword(Keyword::TABLES) { - Ok(self.parse_show_tables(terse, extended, full, external)?) - } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEWS]) { - Ok(self.parse_show_views(terse, true)?) - } else if self.parse_keyword(Keyword::VIEWS) { - Ok(self.parse_show_views(terse, false)?) - } else if self.parse_keyword(Keyword::FUNCTIONS) { - Ok(self.parse_show_functions()?) - } else if extended || full { - Err(ParserError::ParserError( - "EXTENDED/FULL are not supported with this type of SHOW query".to_string(), - )) - } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { - Ok(self.parse_show_create()?) - } else if self.parse_keyword(Keyword::COLLATION) { - Ok(self.parse_show_collation()?) - } else if self.parse_keyword(Keyword::VARIABLES) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - Ok(Statement::ShowVariables { - filter: self.parse_show_statement_filter()?, - session, - global, - }) - } else if self.parse_keyword(Keyword::STATUS) - && dialect_of!(self is MySqlDialect | GenericDialect) - { - Ok(Statement::ShowStatus { - filter: self.parse_show_statement_filter()?, - session, - global, - }) - } else if self.parse_keyword(Keyword::DATABASES) { - self.parse_show_databases(terse) - } else if self.parse_keyword(Keyword::SCHEMAS) { - self.parse_show_schemas(terse) - } else { - Ok(Statement::ShowVariable { - variable: self.parse_identifiers()?, - }) - } - } - - fn parse_show_databases(&mut self, terse: bool) -> Result { - let history = self.parse_keyword(Keyword::HISTORY); - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowDatabases { - terse, - history, - show_options, - }) - } - - fn parse_show_schemas(&mut self, terse: bool) -> Result { - let history = self.parse_keyword(Keyword::HISTORY); - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowSchemas { - terse, - history, - show_options, - }) - } - - pub fn parse_show_create(&mut self) -> Result { - let obj_type = match self.expect_one_of_keywords(&[ - Keyword::TABLE, - Keyword::TRIGGER, - Keyword::FUNCTION, - Keyword::PROCEDURE, - Keyword::EVENT, - Keyword::VIEW, - ])? { - Keyword::TABLE => Ok(ShowCreateObject::Table), - Keyword::TRIGGER => Ok(ShowCreateObject::Trigger), - Keyword::FUNCTION => Ok(ShowCreateObject::Function), - Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), - Keyword::EVENT => Ok(ShowCreateObject::Event), - Keyword::VIEW => Ok(ShowCreateObject::View), - keyword => Err(ParserError::ParserError(format!( - "Unable to map keyword to ShowCreateObject: {keyword:?}" - ))), - }?; - - let obj_name = self.parse_object_name(false)?; - - Ok(Statement::ShowCreate { obj_type, obj_name }) - } - - pub fn parse_show_columns( - &mut self, - extended: bool, - full: bool, - ) -> Result { - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowColumns { - extended, - full, - show_options, - }) - } - - fn parse_show_tables( - &mut self, - terse: bool, - extended: bool, - full: bool, - external: bool, - ) -> Result { - let history = !external && self.parse_keyword(Keyword::HISTORY); - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowTables { - terse, - history, - extended, - full, - external, - show_options, - }) - } - - fn parse_show_views( - &mut self, - terse: bool, - materialized: bool, - ) -> Result { - let show_options = self.parse_show_stmt_options()?; - Ok(Statement::ShowViews { - materialized, - terse, - show_options, - }) - } - - pub fn parse_show_functions(&mut self) -> Result { - let filter = self.parse_show_statement_filter()?; - Ok(Statement::ShowFunctions { filter }) - } - - pub fn parse_show_collation(&mut self) -> Result { - let filter = self.parse_show_statement_filter()?; - Ok(Statement::ShowCollation { filter }) - } - - pub fn parse_show_statement_filter( - &mut self, - ) -> Result, ParserError> { - if self.parse_keyword(Keyword::LIKE) { - Ok(Some(ShowStatementFilter::Like( - self.parse_literal_string()?, - ))) - } else if self.parse_keyword(Keyword::ILIKE) { - Ok(Some(ShowStatementFilter::ILike( - self.parse_literal_string()?, - ))) - } else if self.parse_keyword(Keyword::WHERE) { - Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) - } else { - self.maybe_parse(|parser| -> Result { - parser.parse_literal_string() - })? - .map_or(Ok(None), |filter| { - Ok(Some(ShowStatementFilter::NoKeyword(filter))) - }) - } - } - - pub fn parse_use(&mut self) -> Result { - // Determine which keywords are recognized by the current dialect - let parsed_keyword = if dialect_of!(self is HiveDialect) { - // HiveDialect accepts USE DEFAULT; statement without any db specified - if self.parse_keyword(Keyword::DEFAULT) { - return Ok(Statement::Use(Use::Default)); - } - None // HiveDialect doesn't expect any other specific keyword after `USE` - } else if dialect_of!(self is DatabricksDialect) { - self.parse_one_of_keywords(&[Keyword::CATALOG, Keyword::DATABASE, Keyword::SCHEMA]) - } else if dialect_of!(self is SnowflakeDialect) { - self.parse_one_of_keywords(&[ - Keyword::DATABASE, - Keyword::SCHEMA, - Keyword::WAREHOUSE, - Keyword::ROLE, - Keyword::SECONDARY, - ]) - } else { - None // No specific keywords for other dialects, including GenericDialect - }; - - let result = if matches!(parsed_keyword, Some(Keyword::SECONDARY)) { - self.parse_secondary_roles()? - } else { - let obj_name = self.parse_object_name(false)?; - match parsed_keyword { - Some(Keyword::CATALOG) => Use::Catalog(obj_name), - Some(Keyword::DATABASE) => Use::Database(obj_name), - Some(Keyword::SCHEMA) => Use::Schema(obj_name), - Some(Keyword::WAREHOUSE) => Use::Warehouse(obj_name), - Some(Keyword::ROLE) => Use::Role(obj_name), - _ => Use::Object(obj_name), - } - }; - - Ok(Statement::Use(result)) - } - - fn parse_secondary_roles(&mut self) -> Result { - self.expect_keyword(Keyword::ROLES)?; - if self.parse_keyword(Keyword::NONE) { - Ok(Use::SecondaryRoles(SecondaryRoles::None)) - } else if self.parse_keyword(Keyword::ALL) { - Ok(Use::SecondaryRoles(SecondaryRoles::All)) - } else { - let roles = self.parse_comma_separated(|parser| parser.parse_identifier(false))?; - Ok(Use::SecondaryRoles(SecondaryRoles::List(roles))) - } - } - - pub fn parse_table_and_joins(&mut self) -> Result { - let relation = self.parse_table_factor()?; - // Note that for keywords to be properly handled here, they need to be - // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as - // a table alias. - let mut joins = vec![]; - loop { - let global = self.parse_keyword(Keyword::GLOBAL); - let join = if self.parse_keyword(Keyword::CROSS) { - let join_operator = if self.parse_keyword(Keyword::JOIN) { - JoinOperator::CrossJoin - } else if self.parse_keyword(Keyword::APPLY) { - // MSSQL extension, similar to CROSS JOIN LATERAL - JoinOperator::CrossApply - } else { - return self.expected("JOIN or APPLY after CROSS", self.peek_token()); - }; - Join { - relation: self.parse_table_factor()?, - global, - join_operator, - } - } else if self.parse_keyword(Keyword::OUTER) { - // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 - self.expect_keyword(Keyword::APPLY)?; - Join { - relation: self.parse_table_factor()?, - global, - join_operator: JoinOperator::OuterApply, - } - } else if self.parse_keyword(Keyword::ASOF) { - self.expect_keyword(Keyword::JOIN)?; - let relation = self.parse_table_factor()?; - self.expect_keyword(Keyword::MATCH_CONDITION)?; - let match_condition = self.parse_parenthesized(Self::parse_expr)?; - Join { - relation, - global, - join_operator: JoinOperator::AsOf { - match_condition, - constraint: self.parse_join_constraint(false)?, - }, - } - } else { - let natural = self.parse_keyword(Keyword::NATURAL); - let peek_keyword = if let Token::Word(w) = self.peek_token().token { - w.keyword - } else { - Keyword::NoKeyword - }; - - let join_operator_type = match peek_keyword { - Keyword::INNER | Keyword::JOIN => { - let _ = self.parse_keyword(Keyword::INNER); // [ INNER ] - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Inner - } - kw @ Keyword::LEFT | kw @ Keyword::RIGHT => { - let _ = self.next_token(); // consume LEFT/RIGHT - let is_left = kw == Keyword::LEFT; - let join_type = self.parse_one_of_keywords(&[ - Keyword::OUTER, - Keyword::SEMI, - Keyword::ANTI, - Keyword::JOIN, - ]); - match join_type { - Some(Keyword::OUTER) => { - self.expect_keyword(Keyword::JOIN)?; - if is_left { - JoinOperator::LeftOuter - } else { - JoinOperator::RightOuter - } - } - Some(Keyword::SEMI) => { - self.expect_keyword(Keyword::JOIN)?; - if is_left { - JoinOperator::LeftSemi - } else { - JoinOperator::RightSemi - } - } - Some(Keyword::ANTI) => { - self.expect_keyword(Keyword::JOIN)?; - if is_left { - JoinOperator::LeftAnti - } else { - JoinOperator::RightAnti - } - } - Some(Keyword::JOIN) => { - if is_left { - JoinOperator::LeftOuter - } else { - JoinOperator::RightOuter - } - } - _ => { - return Err(ParserError::ParserError(format!( - "expected OUTER, SEMI, ANTI or JOIN after {kw:?}" - ))) - } - } - } - Keyword::ANTI => { - let _ = self.next_token(); // consume ANTI - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Anti - } - Keyword::SEMI => { - let _ = self.next_token(); // consume SEMI - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::Semi - } - Keyword::FULL => { - let _ = self.next_token(); // consume FULL - let _ = self.parse_keyword(Keyword::OUTER); // [ OUTER ] - self.expect_keyword(Keyword::JOIN)?; - JoinOperator::FullOuter - } - Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); - } - _ if natural => { - return self.expected("a join type after NATURAL", self.peek_token()); - } - _ => break, - }; - let relation = self.parse_table_factor()?; - let join_constraint = self.parse_join_constraint(natural)?; - Join { - relation, - global, - join_operator: join_operator_type(join_constraint), - } - }; - joins.push(join); - } - Ok(TableWithJoins { relation, joins }) - } - - /// A table name or a parenthesized subquery, followed by optional `[AS] alias` - pub fn parse_table_factor(&mut self) -> Result { - if self.parse_keyword(Keyword::LATERAL) { - // LATERAL must always be followed by a subquery or table function. - if self.consume_token(&Token::LParen) { - self.parse_derived_table_factor(Lateral) - } else { - let name = self.parse_object_name(false)?; - self.expect_token(&Token::LParen)?; - let args = self.parse_optional_args()?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Function { - lateral: true, - name, - args, - alias, - }) - } - } else if self.parse_keyword(Keyword::TABLE) { - // parse table function (SELECT * FROM TABLE () [ AS ]) - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::TableFunction { expr, alias }) - } else if self.consume_token(&Token::LParen) { - // A left paren introduces either a derived table (i.e., a subquery) - // or a nested join. It's nearly impossible to determine ahead of - // time which it is... so we just try to parse both. - // - // Here's an example that demonstrates the complexity: - // /-------------------------------------------------------\ - // | /-----------------------------------\ | - // SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) ) - // ^ ^ ^ ^ - // | | | | - // | | | | - // | | | (4) belongs to a SetExpr::Query inside the subquery - // | | (3) starts a derived table (subquery) - // | (2) starts a nested join - // (1) an additional set of parens around a nested join - // - - // If the recently consumed '(' starts a derived table, the call to - // `parse_derived_table_factor` below will return success after parsing the - // subquery, followed by the closing ')', and the alias of the derived table. - // In the example above this is case (3). - if let Some(mut table) = - self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))? - { - while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) - { - table = match kw { - Keyword::PIVOT => self.parse_pivot_table_factor(table)?, - Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), - } - } - return Ok(table); - } - - // A parsing error from `parse_derived_table_factor` indicates that the '(' we've - // recently consumed does not start a derived table (cases 1, 2, or 4). - // `maybe_parse` will ignore such an error and rewind to be after the opening '('. - - // Inside the parentheses we expect to find an (A) table factor - // followed by some joins or (B) another level of nesting. - let mut table_and_joins = self.parse_table_and_joins()?; - - #[allow(clippy::if_same_then_else)] - if !table_and_joins.joins.is_empty() { - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::NestedJoin { - table_with_joins: Box::new(table_and_joins), - alias, - }) // (A) - } else if let TableFactor::NestedJoin { - table_with_joins: _, - alias: _, - } = &table_and_joins.relation - { - // (B): `table_and_joins` (what we found inside the parentheses) - // is a nested join `(foo JOIN bar)`, not followed by other joins. - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::NestedJoin { - table_with_joins: Box::new(table_and_joins), - alias, - }) - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { - // Dialect-specific behavior: Snowflake diverges from the - // standard and from most of the other implementations by - // allowing extra parentheses not only around a join (B), but - // around lone table names (e.g. `FROM (mytable [AS alias])`) - // and around derived tables (e.g. `FROM ((SELECT ...) - // [AS alias])`) as well. - self.expect_token(&Token::RParen)?; - - if let Some(outer_alias) = - self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? - { - // Snowflake also allows specifying an alias *after* parens - // e.g. `FROM (mytable) AS alias` - match &mut table_and_joins.relation { - TableFactor::Derived { alias, .. } - | TableFactor::Table { alias, .. } - | TableFactor::Function { alias, .. } - | TableFactor::UNNEST { alias, .. } - | TableFactor::JsonTable { alias, .. } - | TableFactor::OpenJsonTable { alias, .. } - | TableFactor::TableFunction { alias, .. } - | TableFactor::Pivot { alias, .. } - | TableFactor::Unpivot { alias, .. } - | TableFactor::MatchRecognize { alias, .. } - | TableFactor::NestedJoin { alias, .. } => { - // but not `FROM (mytable AS alias1) AS alias2`. - if let Some(inner_alias) = alias { - return Err(ParserError::ParserError(format!( - "duplicate alias {inner_alias}" - ))); - } - // Act as if the alias was specified normally next - // to the table name: `(mytable) AS alias` -> - // `(mytable AS alias)` - alias.replace(outer_alias); - } - }; - } - // Do not store the extra set of parens in the AST - Ok(table_and_joins.relation) - } else { - // The SQL spec prohibits derived tables and bare tables from - // appearing alone in parentheses (e.g. `FROM (mytable)`) - self.expected("joined table", self.peek_token()) - } - } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) - && matches!( - self.peek_tokens(), - [ - Token::Word(Word { - keyword: Keyword::VALUES, - .. - }), - Token::LParen - ] - ) - { - self.expect_keyword(Keyword::VALUES)?; - - // Snowflake and Databricks allow syntax like below: - // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) - // where there are no parentheses around the VALUES clause. - let values = SetExpr::Values(self.parse_values(false)?); - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Derived { - lateral: false, - subquery: Box::new(Query { - with: None, - body: Box::new(values), - order_by: None, - limit: None, - limit_by: vec![], - offset: None, - fetch: None, - locks: vec![], - for_clause: None, - settings: None, - format_clause: None, - }), - alias, - }) - } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) - && self.parse_keyword(Keyword::UNNEST) - { - self.expect_token(&Token::LParen)?; - let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - - let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); - let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { - Ok(Some(alias)) => Some(alias), - Ok(None) => None, - Err(e) => return Err(e), - }; - - let with_offset = match self.expect_keywords(&[Keyword::WITH, Keyword::OFFSET]) { - Ok(()) => true, - Err(_) => false, - }; - - let with_offset_alias = if with_offset { - match self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) { - Ok(Some(alias)) => Some(alias), - Ok(None) => None, - Err(e) => return Err(e), - } - } else { - None - }; - - Ok(TableFactor::UNNEST { - alias, - array_exprs, - with_offset, - with_offset_alias, - with_ordinality, - }) - } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { - let json_expr = self.parse_expr()?; - self.expect_token(&Token::Comma)?; - let json_path = self.parse_value()?; - self.expect_keyword(Keyword::COLUMNS)?; - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; - self.expect_token(&Token::RParen)?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::JsonTable { - json_expr, - json_path, - columns, - alias, - }) - } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { - self.prev_token(); - self.parse_open_json_table_factor() - } else { - let name = self.parse_object_name(true)?; - - let json_path = match self.peek_token().token { - Token::LBracket if self.dialect.supports_partiql() => Some(self.parse_json_path()?), - _ => None, - }; - - let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) - && self.parse_keyword(Keyword::PARTITION) - { - self.parse_parenthesized_identifiers()? - } else { - vec![] - }; - - // Parse potential version qualifier - let version = self.parse_table_version()?; - - // Postgres, MSSQL, ClickHouse: table-valued functions: - let args = if self.consume_token(&Token::LParen) { - Some(self.parse_table_function_args()?) - } else { - None - }; - - let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); - - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - - // MSSQL-specific table hints: - let mut with_hints = vec![]; - if self.parse_keyword(Keyword::WITH) { - if self.consume_token(&Token::LParen) { - with_hints = self.parse_comma_separated(Parser::parse_expr)?; - self.expect_token(&Token::RParen)?; - } else { - // rewind, as WITH may belong to the next statement's CTE - self.prev_token(); - } - }; - - let mut table = TableFactor::Table { - name, - alias, - args, - with_hints, - version, - partitions, - with_ordinality, - json_path, - }; - - while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { - table = match kw { - Keyword::PIVOT => self.parse_pivot_table_factor(table)?, - Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), - } - } - - if self.dialect.supports_match_recognize() - && self.parse_keyword(Keyword::MATCH_RECOGNIZE) - { - table = self.parse_match_recognize(table)?; - } - - Ok(table) - } - } - - /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, - /// assuming the `OPENJSON` keyword was already consumed. - fn parse_open_json_table_factor(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let json_expr = self.parse_expr()?; - let json_path = if self.consume_token(&Token::Comma) { - Some(self.parse_value()?) - } else { - None - }; - self.expect_token(&Token::RParen)?; - let columns = if self.parse_keyword(Keyword::WITH) { - self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_openjson_table_column_def)?; - self.expect_token(&Token::RParen)?; - columns - } else { - Vec::new() - }; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::OpenJsonTable { - json_expr, - json_path, - columns, - alias, - }) - } - - fn parse_match_recognize(&mut self, table: TableFactor) -> Result { - self.expect_token(&Token::LParen)?; - - let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - - let measures = if self.parse_keyword(Keyword::MEASURES) { - self.parse_comma_separated(|p| { - let expr = p.parse_expr()?; - let _ = p.parse_keyword(Keyword::AS); - let alias = p.parse_identifier(false)?; - Ok(Measure { expr, alias }) - })? - } else { - vec![] - }; - - let rows_per_match = - if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) { - Some(RowsPerMatch::OneRow) - } else if self.parse_keywords(&[ - Keyword::ALL, - Keyword::ROWS, - Keyword::PER, - Keyword::MATCH, - ]) { - Some(RowsPerMatch::AllRows( - if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) { - Some(EmptyMatchesMode::Show) - } else if self.parse_keywords(&[ - Keyword::OMIT, - Keyword::EMPTY, - Keyword::MATCHES, - ]) { - Some(EmptyMatchesMode::Omit) - } else if self.parse_keywords(&[ - Keyword::WITH, - Keyword::UNMATCHED, - Keyword::ROWS, - ]) { - Some(EmptyMatchesMode::WithUnmatched) - } else { - None - }, - )) - } else { - None - }; - - let after_match_skip = - if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) { - if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) { - Some(AfterMatchSkip::PastLastRow) - } else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) { - Some(AfterMatchSkip::ToNextRow) - } else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) { - Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?)) - } else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) { - Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?)) - } else { - let found = self.next_token(); - return self.expected("after match skip option", found); - } - } else { - None - }; - - self.expect_keyword(Keyword::PATTERN)?; - let pattern = self.parse_parenthesized(Self::parse_pattern)?; - - self.expect_keyword(Keyword::DEFINE)?; - - let symbols = self.parse_comma_separated(|p| { - let symbol = p.parse_identifier(false)?; - p.expect_keyword(Keyword::AS)?; - let definition = p.parse_expr()?; - Ok(SymbolDefinition { symbol, definition }) - })?; - - self.expect_token(&Token::RParen)?; - - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - - Ok(TableFactor::MatchRecognize { - table: Box::new(table), - partition_by, - order_by, - measures, - rows_per_match, - after_match_skip, - pattern, - symbols, - alias, - }) - } - - fn parse_base_pattern(&mut self) -> Result { - match self.next_token().token { - Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)), - Token::Placeholder(s) if s == "$" => { - Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End)) - } - Token::LBrace => { - self.expect_token(&Token::Minus)?; - let symbol = self - .parse_identifier(false) - .map(MatchRecognizeSymbol::Named)?; - self.expect_token(&Token::Minus)?; - self.expect_token(&Token::RBrace)?; - Ok(MatchRecognizePattern::Exclude(symbol)) - } - Token::Word(Word { - value, - quote_style: None, - .. - }) if value == "PERMUTE" => { - self.expect_token(&Token::LParen)?; - let symbols = self.parse_comma_separated(|p| { - p.parse_identifier(false).map(MatchRecognizeSymbol::Named) - })?; - self.expect_token(&Token::RParen)?; - Ok(MatchRecognizePattern::Permute(symbols)) - } - Token::LParen => { - let pattern = self.parse_pattern()?; - self.expect_token(&Token::RParen)?; - Ok(MatchRecognizePattern::Group(Box::new(pattern))) - } - _ => { - self.prev_token(); - self.parse_identifier(false) - .map(MatchRecognizeSymbol::Named) - .map(MatchRecognizePattern::Symbol) - } - } - } - - fn parse_repetition_pattern(&mut self) -> Result { - let mut pattern = self.parse_base_pattern()?; - loop { - let token = self.next_token(); - let quantifier = match token.token { - Token::Mul => RepetitionQuantifier::ZeroOrMore, - Token::Plus => RepetitionQuantifier::OneOrMore, - Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne, - Token::LBrace => { - // quantifier is a range like {n} or {n,} or {,m} or {n,m} - let token = self.next_token(); - match token.token { - Token::Comma => { - let next_token = self.next_token(); - let Token::Number(n, _) = next_token.token else { - return self.expected("literal number", next_token); - }; - self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::AtMost(Self::parse(n, token.span.start)?) - } - Token::Number(n, _) if self.consume_token(&Token::Comma) => { - let next_token = self.next_token(); - match next_token.token { - Token::Number(m, _) => { - self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Range( - Self::parse(n, token.span.start)?, - Self::parse(m, token.span.start)?, - ) - } - Token::RBrace => { - RepetitionQuantifier::AtLeast(Self::parse(n, token.span.start)?) - } - _ => { - return self.expected("} or upper bound", next_token); - } - } - } - Token::Number(n, _) => { - self.expect_token(&Token::RBrace)?; - RepetitionQuantifier::Exactly(Self::parse(n, token.span.start)?) - } - _ => return self.expected("quantifier range", token), - } - } - _ => { - self.prev_token(); - break; - } - }; - pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier); - } - Ok(pattern) - } - - fn parse_concat_pattern(&mut self) -> Result { - let mut patterns = vec![self.parse_repetition_pattern()?]; - while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) { - patterns.push(self.parse_repetition_pattern()?); - } - match <[MatchRecognizePattern; 1]>::try_from(patterns) { - Ok([pattern]) => Ok(pattern), - Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)), - } - } - - fn parse_pattern(&mut self) -> Result { - let pattern = self.parse_concat_pattern()?; - if self.consume_token(&Token::Pipe) { - match self.parse_pattern()? { - // flatten nested alternations - MatchRecognizePattern::Alternation(mut patterns) => { - patterns.insert(0, pattern); - Ok(MatchRecognizePattern::Alternation(patterns)) - } - next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])), - } - } else { - Ok(pattern) - } - } - - /// Parse a given table version specifier. - /// - /// For now it only supports timestamp versioning for BigQuery and MSSQL dialects. - pub fn parse_table_version(&mut self) -> Result, ParserError> { - if dialect_of!(self is BigQueryDialect | MsSqlDialect) - && self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) - { - let expr = self.parse_expr()?; - Ok(Some(TableVersion::ForSystemTimeAsOf(expr))) - } else { - Ok(None) - } - } - - /// Parses MySQL's JSON_TABLE column definition. - /// For example: `id INT EXISTS PATH '$' DEFAULT '0' ON EMPTY ERROR ON ERROR` - pub fn parse_json_table_column_def(&mut self) -> Result { - if self.parse_keyword(Keyword::NESTED) { - let _has_path_keyword = self.parse_keyword(Keyword::PATH); - let path = self.parse_value()?; - self.expect_keyword(Keyword::COLUMNS)?; - let columns = self.parse_parenthesized(|p| { - p.parse_comma_separated(Self::parse_json_table_column_def) - })?; - return Ok(JsonTableColumn::Nested(JsonTableNestedColumn { - path, - columns, - })); - } - let name = self.parse_identifier(false)?; - if self.parse_keyword(Keyword::FOR) { - self.expect_keyword(Keyword::ORDINALITY)?; - return Ok(JsonTableColumn::ForOrdinality(name)); - } - let r#type = self.parse_data_type()?; - let exists = self.parse_keyword(Keyword::EXISTS); - self.expect_keyword(Keyword::PATH)?; - let path = self.parse_value()?; - let mut on_empty = None; - let mut on_error = None; - while let Some(error_handling) = self.parse_json_table_column_error_handling()? { - if self.parse_keyword(Keyword::EMPTY) { - on_empty = Some(error_handling); - } else { - self.expect_keyword(Keyword::ERROR)?; - on_error = Some(error_handling); - } - } - Ok(JsonTableColumn::Named(JsonTableNamedColumn { - name, - r#type, - path, - exists, - on_empty, - on_error, - })) - } - - /// Parses MSSQL's `OPENJSON WITH` column definition. - /// - /// ```sql - /// colName type [ column_path ] [ AS JSON ] - /// ``` - /// - /// Reference: - pub fn parse_openjson_table_column_def(&mut self) -> Result { - let name = self.parse_identifier(false)?; - let r#type = self.parse_data_type()?; - let path = if let Token::SingleQuotedString(path) = self.peek_token().token { - self.next_token(); - Some(path) - } else { - None - }; - let as_json = self.parse_keyword(Keyword::AS); - if as_json { - self.expect_keyword(Keyword::JSON)?; - } - Ok(OpenJsonTableColumn { - name, - r#type, - path, - as_json, - }) - } - - fn parse_json_table_column_error_handling( - &mut self, - ) -> Result, ParserError> { - let res = if self.parse_keyword(Keyword::NULL) { - JsonTableColumnErrorHandling::Null - } else if self.parse_keyword(Keyword::ERROR) { - JsonTableColumnErrorHandling::Error - } else if self.parse_keyword(Keyword::DEFAULT) { - JsonTableColumnErrorHandling::Default(self.parse_value()?) - } else { - return Ok(None); - }; - self.expect_keyword(Keyword::ON)?; - Ok(Some(res)) - } - - pub fn parse_derived_table_factor( - &mut self, - lateral: IsLateral, - ) -> Result { - let subquery = self.parse_query()?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Derived { - lateral: match lateral { - Lateral => true, - NotLateral => false, - }, - subquery, - alias, - }) - } - - fn parse_aliased_function_call(&mut self) -> Result { - let function_name = match self.next_token().token { - Token::Word(w) => Ok(w.value), - _ => self.expected("a function identifier", self.peek_token()), - }?; - let expr = self.parse_function(ObjectName(vec![Ident::new(function_name)]))?; - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - Ok(ExprWithAlias { expr, alias }) - } - /// Parses an expression with an optional alias - /// - /// Examples: - /// - /// ```sql - /// SUM(price) AS total_price - /// ``` - /// ```sql - /// SUM(price) - /// ``` - /// - /// Example - /// ``` - /// # use sqlparser::parser::{Parser, ParserError}; - /// # use sqlparser::dialect::GenericDialect; - /// # fn main() ->Result<(), ParserError> { - /// let sql = r#"SUM("a") as "b""#; - /// let mut parser = Parser::new(&GenericDialect).try_with_sql(sql)?; - /// let expr_with_alias = parser.parse_expr_with_alias()?; - /// assert_eq!(Some("b".to_string()), expr_with_alias.alias.map(|x|x.value)); - /// # Ok(()) - /// # } - pub fn parse_expr_with_alias(&mut self) -> Result { - let expr = self.parse_expr()?; - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - Ok(ExprWithAlias { expr, alias }) - } - - pub fn parse_pivot_table_factor( - &mut self, - table: TableFactor, - ) -> Result { - self.expect_token(&Token::LParen)?; - let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; - self.expect_keyword(Keyword::FOR)?; - let value_column = self.parse_object_name(false)?.0; - self.expect_keyword(Keyword::IN)?; - - self.expect_token(&Token::LParen)?; - let value_source = if self.parse_keyword(Keyword::ANY) { - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - PivotValueSource::Any(order_by) - } else if self.peek_sub_query() { - PivotValueSource::Subquery(self.parse_query()?) - } else { - PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) - }; - self.expect_token(&Token::RParen)?; - - let default_on_null = - if self.parse_keywords(&[Keyword::DEFAULT, Keyword::ON, Keyword::NULL]) { - self.expect_token(&Token::LParen)?; - let expr = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(expr) - } else { - None - }; - - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Pivot { - table: Box::new(table), - aggregate_functions, - value_column, - value_source, - default_on_null, - alias, - }) - } - - pub fn parse_unpivot_table_factor( - &mut self, - table: TableFactor, - ) -> Result { - self.expect_token(&Token::LParen)?; - let value = self.parse_identifier(false)?; - self.expect_keyword(Keyword::FOR)?; - let name = self.parse_identifier(false)?; - self.expect_keyword(Keyword::IN)?; - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - self.expect_token(&Token::RParen)?; - let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; - Ok(TableFactor::Unpivot { - table: Box::new(table), - value, - name, - columns, - alias, - }) - } - - pub fn parse_join_constraint(&mut self, natural: bool) -> Result { - if natural { - Ok(JoinConstraint::Natural) - } else if self.parse_keyword(Keyword::ON) { - let constraint = self.parse_expr()?; - Ok(JoinConstraint::On(constraint)) - } else if self.parse_keyword(Keyword::USING) { - let columns = self.parse_parenthesized_column_list(Mandatory, false)?; - Ok(JoinConstraint::Using(columns)) - } else { - Ok(JoinConstraint::None) - //self.expected("ON, or USING after JOIN", self.peek_token()) - } - } - - /// Parse a GRANT statement. - pub fn parse_grant(&mut self) -> Result { - let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; - - self.expect_keyword(Keyword::TO)?; - let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; - - let with_grant_option = - self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); - - let granted_by = self - .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier(false).unwrap()); - - Ok(Statement::Grant { - privileges, - objects, - grantees, - with_grant_option, - granted_by, - }) - } - - pub fn parse_grant_revoke_privileges_objects( - &mut self, - ) -> Result<(Privileges, GrantObjects), ParserError> { - let privileges = if self.parse_keyword(Keyword::ALL) { - Privileges::All { - with_privileges_keyword: self.parse_keyword(Keyword::PRIVILEGES), - } - } else { - let (actions, err): (Vec<_>, Vec<_>) = self - .parse_actions_list()? - .into_iter() - .map(|(kw, columns)| match kw { - Keyword::DELETE => Ok(Action::Delete), - Keyword::INSERT => Ok(Action::Insert { columns }), - Keyword::REFERENCES => Ok(Action::References { columns }), - Keyword::SELECT => Ok(Action::Select { columns }), - Keyword::TRIGGER => Ok(Action::Trigger), - Keyword::TRUNCATE => Ok(Action::Truncate), - Keyword::UPDATE => Ok(Action::Update { columns }), - Keyword::USAGE => Ok(Action::Usage), - Keyword::CONNECT => Ok(Action::Connect), - Keyword::CREATE => Ok(Action::Create), - Keyword::EXECUTE => Ok(Action::Execute), - Keyword::TEMPORARY => Ok(Action::Temporary), - // This will cover all future added keywords to - // parse_grant_permission and unhandled in this - // match - _ => Err(kw), - }) - .partition(Result::is_ok); - - if !err.is_empty() { - let errors: Vec = err.into_iter().filter_map(|x| x.err()).collect(); - return Err(ParserError::ParserError(format!( - "INTERNAL ERROR: GRANT/REVOKE unexpected keyword(s) - {errors:?}" - ))); - } - let act = actions.into_iter().filter_map(|x| x.ok()).collect(); - Privileges::Actions(act) - }; - - self.expect_keyword(Keyword::ON)?; - - let objects = if self.parse_keywords(&[ - Keyword::ALL, - Keyword::TABLES, - Keyword::IN, - Keyword::SCHEMA, - ]) { - GrantObjects::AllTablesInSchema { - schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, - } - } else if self.parse_keywords(&[ - Keyword::ALL, - Keyword::SEQUENCES, - Keyword::IN, - Keyword::SCHEMA, - ]) { - GrantObjects::AllSequencesInSchema { - schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?, - } - } else { - let object_type = - self.parse_one_of_keywords(&[Keyword::SEQUENCE, Keyword::SCHEMA, Keyword::TABLE]); - let objects = self.parse_comma_separated(|p| p.parse_object_name(false)); - match object_type { - Some(Keyword::SCHEMA) => GrantObjects::Schemas(objects?), - Some(Keyword::SEQUENCE) => GrantObjects::Sequences(objects?), - Some(Keyword::TABLE) | None => GrantObjects::Tables(objects?), - _ => unreachable!(), - } - }; - - Ok((privileges, objects)) - } - - pub fn parse_grant_permission(&mut self) -> Result { - if let Some(kw) = self.parse_one_of_keywords(&[ - Keyword::CONNECT, - Keyword::CREATE, - Keyword::DELETE, - Keyword::EXECUTE, - Keyword::INSERT, - Keyword::REFERENCES, - Keyword::SELECT, - Keyword::TEMPORARY, - Keyword::TRIGGER, - Keyword::TRUNCATE, - Keyword::UPDATE, - Keyword::USAGE, - ]) { - let columns = match kw { - Keyword::INSERT | Keyword::REFERENCES | Keyword::SELECT | Keyword::UPDATE => { - let columns = self.parse_parenthesized_column_list(Optional, false)?; - if columns.is_empty() { - None - } else { - Some(columns) - } - } - _ => None, - }; - Ok((kw, columns)) - } else { - self.expected("a privilege keyword", self.peek_token())? - } - } - - /// Parse a REVOKE statement - pub fn parse_revoke(&mut self) -> Result { - let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; - - self.expect_keyword(Keyword::FROM)?; - let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; - - let granted_by = self - .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) - .then(|| self.parse_identifier(false).unwrap()); - - let loc = self.peek_token().span.start; - let cascade = self.parse_keyword(Keyword::CASCADE); - let restrict = self.parse_keyword(Keyword::RESTRICT); - if cascade && restrict { - return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); - } - - Ok(Statement::Revoke { - privileges, - objects, - grantees, - granted_by, - cascade, - }) - } - - /// Parse an REPLACE statement - pub fn parse_replace(&mut self) -> Result { - if !dialect_of!(self is MySqlDialect | GenericDialect) { - return parser_err!( - "Unsupported statement REPLACE", - self.peek_token().span.start - ); - } - - let mut insert = self.parse_insert()?; - if let Statement::Insert(Insert { replace_into, .. }) = &mut insert { - *replace_into = true; - } - - Ok(insert) - } - - /// Parse an INSERT statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_insert_setexpr_boxed(&mut self) -> Result, ParserError> { - Ok(Box::new(SetExpr::Insert(self.parse_insert()?))) - } - - /// Parse an INSERT statement - pub fn parse_insert(&mut self) -> Result { - let or = self.parse_conflict_clause(); - let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { - None - } else if self.parse_keyword(Keyword::LOW_PRIORITY) { - Some(MysqlInsertPriority::LowPriority) - } else if self.parse_keyword(Keyword::DELAYED) { - Some(MysqlInsertPriority::Delayed) - } else if self.parse_keyword(Keyword::HIGH_PRIORITY) { - Some(MysqlInsertPriority::HighPriority) - } else { - None - }; - - let ignore = dialect_of!(self is MySqlDialect | GenericDialect) - && self.parse_keyword(Keyword::IGNORE); - - let replace_into = false; - - let action = self.parse_one_of_keywords(&[Keyword::INTO, Keyword::OVERWRITE]); - let into = action == Some(Keyword::INTO); - let overwrite = action == Some(Keyword::OVERWRITE); - - let local = self.parse_keyword(Keyword::LOCAL); - - if self.parse_keyword(Keyword::DIRECTORY) { - let path = self.parse_literal_string()?; - let file_format = if self.parse_keywords(&[Keyword::STORED, Keyword::AS]) { - Some(self.parse_file_format()?) - } else { - None - }; - let source = self.parse_query()?; - Ok(Statement::Directory { - local, - path, - overwrite, - file_format, - source, - }) - } else { - // Hive lets you put table here regardless - let table = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name(false)?; - - let table_alias = - if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier(false)?) - } else { - None - }; - - let is_mysql = dialect_of!(self is MySqlDialect); - - let (columns, partitioned, after_columns, source) = - if self.parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { - (vec![], None, vec![], None) - } else { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - - let partitioned = self.parse_insert_partition()?; - // Hive allows you to specify columns after partitions as well if you want. - let after_columns = if dialect_of!(self is HiveDialect) { - self.parse_parenthesized_column_list(Optional, false)? - } else { - vec![] - }; - - let source = Some(self.parse_query()?); - - (columns, partitioned, after_columns, source) - }; - - let insert_alias = if dialect_of!(self is MySqlDialect | GenericDialect) - && self.parse_keyword(Keyword::AS) - { - let row_alias = self.parse_object_name(false)?; - let col_aliases = Some(self.parse_parenthesized_column_list(Optional, false)?); - Some(InsertAliases { - row_alias, - col_aliases, - }) - } else { - None - }; - - let on = if self.parse_keyword(Keyword::ON) { - if self.parse_keyword(Keyword::CONFLICT) { - let conflict_target = - if self.parse_keywords(&[Keyword::ON, Keyword::CONSTRAINT]) { - Some(ConflictTarget::OnConstraint(self.parse_object_name(false)?)) - } else if self.peek_token() == Token::LParen { - Some(ConflictTarget::Columns( - self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, - )) - } else { - None - }; - - self.expect_keyword(Keyword::DO)?; - let action = if self.parse_keyword(Keyword::NOTHING) { - OnConflictAction::DoNothing - } else { - self.expect_keyword(Keyword::UPDATE)?; - self.expect_keyword(Keyword::SET)?; - let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - OnConflictAction::DoUpdate(DoUpdate { - assignments, - selection, - }) - }; - - Some(OnInsert::OnConflict(OnConflict { - conflict_target, - action, - })) - } else { - self.expect_keyword(Keyword::DUPLICATE)?; - self.expect_keyword(Keyword::KEY)?; - self.expect_keyword(Keyword::UPDATE)?; - let l = self.parse_comma_separated(Parser::parse_assignment)?; - - Some(OnInsert::DuplicateKeyUpdate(l)) - } - } else { - None - }; - - let returning = if self.parse_keyword(Keyword::RETURNING) { - Some(self.parse_comma_separated(Parser::parse_select_item)?) - } else { - None - }; - - Ok(Statement::Insert(Insert { - or, - table_name, - table_alias, - ignore, - into, - overwrite, - partitioned, - columns, - after_columns, - source, - table, - on, - returning, - replace_into, - priority, - insert_alias, - })) - } - } - - fn parse_conflict_clause(&mut self) -> Option { - if self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]) { - Some(SqliteOnConflict::Replace) - } else if self.parse_keywords(&[Keyword::OR, Keyword::ROLLBACK]) { - Some(SqliteOnConflict::Rollback) - } else if self.parse_keywords(&[Keyword::OR, Keyword::ABORT]) { - Some(SqliteOnConflict::Abort) - } else if self.parse_keywords(&[Keyword::OR, Keyword::FAIL]) { - Some(SqliteOnConflict::Fail) - } else if self.parse_keywords(&[Keyword::OR, Keyword::IGNORE]) { - Some(SqliteOnConflict::Ignore) - } else if self.parse_keyword(Keyword::REPLACE) { - Some(SqliteOnConflict::Replace) - } else { - None - } - } - - pub fn parse_insert_partition(&mut self) -> Result>, ParserError> { - if self.parse_keyword(Keyword::PARTITION) { - self.expect_token(&Token::LParen)?; - let partition_cols = Some(self.parse_comma_separated(Parser::parse_expr)?); - self.expect_token(&Token::RParen)?; - Ok(partition_cols) - } else { - Ok(None) - } - } - - pub fn parse_load_data_table_format( - &mut self, - ) -> Result, ParserError> { - if self.parse_keyword(Keyword::INPUTFORMAT) { - let input_format = self.parse_expr()?; - self.expect_keyword(Keyword::SERDE)?; - let serde = self.parse_expr()?; - Ok(Some(HiveLoadDataFormat { - input_format, - serde, - })) - } else { - Ok(None) - } - } - - /// Parse an UPDATE statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_update_setexpr_boxed(&mut self) -> Result, ParserError> { - Ok(Box::new(SetExpr::Update(self.parse_update()?))) - } - - pub fn parse_update(&mut self) -> Result { - let or = self.parse_conflict_clause(); - let table = self.parse_table_and_joins()?; - self.expect_keyword(Keyword::SET)?; - let assignments = self.parse_comma_separated(Parser::parse_assignment)?; - let from = if self.parse_keyword(Keyword::FROM) - && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect | SQLiteDialect ) - { - Some(self.parse_table_and_joins()?) - } else { - None - }; - let selection = if self.parse_keyword(Keyword::WHERE) { - Some(self.parse_expr()?) - } else { - None - }; - let returning = if self.parse_keyword(Keyword::RETURNING) { - Some(self.parse_comma_separated(Parser::parse_select_item)?) - } else { - None - }; - Ok(Statement::Update { - table, - assignments, - from, - selection, - returning, - or, - }) - } - - /// Parse a `var = expr` assignment, used in an UPDATE statement - pub fn parse_assignment(&mut self) -> Result { - let target = self.parse_assignment_target()?; - self.expect_token(&Token::Eq)?; - let value = self.parse_expr()?; - Ok(Assignment { target, value }) - } - - /// Parse the left-hand side of an assignment, used in an UPDATE statement - pub fn parse_assignment_target(&mut self) -> Result { - if self.consume_token(&Token::LParen) { - let columns = self.parse_comma_separated(|p| p.parse_object_name(false))?; - self.expect_token(&Token::RParen)?; - Ok(AssignmentTarget::Tuple(columns)) - } else { - let column = self.parse_object_name(false)?; - Ok(AssignmentTarget::ColumnName(column)) - } - } - - pub fn parse_function_args(&mut self) -> Result { - let arg = if self.dialect.supports_named_fn_args_with_expr_name() { - self.maybe_parse(|p| { - let name = p.parse_expr()?; - let operator = p.parse_function_named_arg_operator()?; - let arg = p.parse_wildcard_expr()?.into(); - Ok(FunctionArg::ExprNamed { - name, - arg, - operator, - }) - })? - } else { - self.maybe_parse(|p| { - let name = p.parse_identifier(false)?; - let operator = p.parse_function_named_arg_operator()?; - let arg = p.parse_wildcard_expr()?.into(); - Ok(FunctionArg::Named { - name, - arg, - operator, - }) - })? - }; - if let Some(arg) = arg { - return Ok(arg); - } - Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) - } - - fn parse_function_named_arg_operator(&mut self) -> Result { - if self.parse_keyword(Keyword::VALUE) { - return Ok(FunctionArgOperator::Value); - } - let tok = self.next_token(); - match tok.token { - Token::RArrow if self.dialect.supports_named_fn_args_with_rarrow_operator() => { - Ok(FunctionArgOperator::RightArrow) - } - Token::Eq if self.dialect.supports_named_fn_args_with_eq_operator() => { - Ok(FunctionArgOperator::Equals) - } - Token::Assignment - if self - .dialect - .supports_named_fn_args_with_assignment_operator() => - { - Ok(FunctionArgOperator::Assignment) - } - Token::Colon if self.dialect.supports_named_fn_args_with_colon_operator() => { - Ok(FunctionArgOperator::Colon) - } - _ => { - self.prev_token(); - self.expected("argument operator", tok) - } - } - } - - pub fn parse_optional_args(&mut self) -> Result, ParserError> { - if self.consume_token(&Token::RParen) { - Ok(vec![]) - } else { - let args = self.parse_comma_separated(Parser::parse_function_args)?; - self.expect_token(&Token::RParen)?; - Ok(args) - } - } - - fn parse_table_function_args(&mut self) -> Result { - if self.consume_token(&Token::RParen) { - return Ok(TableFunctionArgs { - args: vec![], - settings: None, - }); - } - let mut args = vec![]; - let settings = loop { - if let Some(settings) = self.parse_settings()? { - break Some(settings); - } - args.push(self.parse_function_args()?); - if self.is_parse_comma_separated_end() { - break None; - } - }; - self.expect_token(&Token::RParen)?; - Ok(TableFunctionArgs { args, settings }) - } - - /// Parses a potentially empty list of arguments to a window function - /// (including the closing parenthesis). - /// - /// Examples: - /// ```sql - /// FIRST_VALUE(x ORDER BY 1,2,3); - /// FIRST_VALUE(x IGNORE NULL); - /// ``` - fn parse_function_argument_list(&mut self) -> Result { - let mut clauses = vec![]; - - // For MSSQL empty argument list with json-null-clause case, e.g. `JSON_ARRAY(NULL ON NULL)` - if let Some(null_clause) = self.parse_json_null_clause() { - clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); - } - - if self.consume_token(&Token::RParen) { - return Ok(FunctionArgumentList { - duplicate_treatment: None, - args: vec![], - clauses, - }); - } - - let duplicate_treatment = self.parse_duplicate_treatment()?; - let args = self.parse_comma_separated(Parser::parse_function_args)?; - - if self.dialect.supports_window_function_null_treatment_arg() { - if let Some(null_treatment) = self.parse_null_treatment()? { - clauses.push(FunctionArgumentClause::IgnoreOrRespectNulls(null_treatment)); - } - } - - if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - clauses.push(FunctionArgumentClause::OrderBy( - self.parse_comma_separated(Parser::parse_order_by_expr)?, - )); - } - - if self.parse_keyword(Keyword::LIMIT) { - clauses.push(FunctionArgumentClause::Limit(self.parse_expr()?)); - } - - if dialect_of!(self is GenericDialect | BigQueryDialect) - && self.parse_keyword(Keyword::HAVING) - { - let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { - Keyword::MIN => HavingBoundKind::Min, - Keyword::MAX => HavingBoundKind::Max, - _ => unreachable!(), - }; - clauses.push(FunctionArgumentClause::Having(HavingBound( - kind, - self.parse_expr()?, - ))) - } - - if dialect_of!(self is GenericDialect | MySqlDialect) - && self.parse_keyword(Keyword::SEPARATOR) - { - clauses.push(FunctionArgumentClause::Separator(self.parse_value()?)); - } - - if let Some(on_overflow) = self.parse_listagg_on_overflow()? { - clauses.push(FunctionArgumentClause::OnOverflow(on_overflow)); - } - - if let Some(null_clause) = self.parse_json_null_clause() { - clauses.push(FunctionArgumentClause::JsonNullClause(null_clause)); - } - - self.expect_token(&Token::RParen)?; - Ok(FunctionArgumentList { - duplicate_treatment, - args, - clauses, - }) - } - - /// Parses MSSQL's json-null-clause - fn parse_json_null_clause(&mut self) -> Option { - if self.parse_keywords(&[Keyword::ABSENT, Keyword::ON, Keyword::NULL]) { - Some(JsonNullClause::AbsentOnNull) - } else if self.parse_keywords(&[Keyword::NULL, Keyword::ON, Keyword::NULL]) { - Some(JsonNullClause::NullOnNull) - } else { - None - } - } - - fn parse_duplicate_treatment(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; - match ( - self.parse_keyword(Keyword::ALL), - self.parse_keyword(Keyword::DISTINCT), - ) { - (true, false) => Ok(Some(DuplicateTreatment::All)), - (false, true) => Ok(Some(DuplicateTreatment::Distinct)), - (false, false) => Ok(None), - (true, true) => parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc), - } - } - - /// Parse a comma-delimited list of projections after SELECT - pub fn parse_select_item(&mut self) -> Result { - match self.parse_wildcard_expr()? { - Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard( - prefix, - self.parse_wildcard_additional_options(token.0)?, - )), - Expr::Wildcard(token) => Ok(SelectItem::Wildcard( - self.parse_wildcard_additional_options(token.0)?, - )), - Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { - parser_err!( - format!("Expected an expression, found: {}", v), - self.peek_token().span.start - ) - } - Expr::BinaryOp { - left, - op: BinaryOperator::Eq, - right, - } if self.dialect.supports_eq_alias_assignment() - && matches!(left.as_ref(), Expr::Identifier(_)) => - { - let Expr::Identifier(alias) = *left else { - return parser_err!( - "BUG: expected identifier expression as alias", - self.peek_token().span.start - ); - }; - Ok(SelectItem::ExprWithAlias { - expr: *right, - alias, - }) - } - expr => self - .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) - .map(|alias| match alias { - Some(alias) => SelectItem::ExprWithAlias { expr, alias }, - None => SelectItem::UnnamedExpr(expr), - }), - } - } - - /// Parse an [`WildcardAdditionalOptions`] information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_wildcard_additional_options( - &mut self, - wildcard_token: TokenWithSpan, - ) -> Result { - let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_ilike()? - } else { - None - }; - let opt_exclude = if opt_ilike.is_none() - && dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_exclude()? - } else { - None - }; - let opt_except = if self.dialect.supports_select_wildcard_except() { - self.parse_optional_select_item_except()? - } else { - None - }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { - self.parse_optional_select_item_replace()? - } else { - None - }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { - self.parse_optional_select_item_rename()? - } else { - None - }; - - Ok(WildcardAdditionalOptions { - wildcard_token: wildcard_token.into(), - opt_ilike, - opt_exclude, - opt_except, - opt_rename, - opt_replace, - }) - } - - /// Parse an [`Ilike`](IlikeSelectItem) information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_optional_select_item_ilike( - &mut self, - ) -> Result, ParserError> { - let opt_ilike = if self.parse_keyword(Keyword::ILIKE) { - let next_token = self.next_token(); - let pattern = match next_token.token { - Token::SingleQuotedString(s) => s, - _ => return self.expected("ilike pattern", next_token), - }; - Some(IlikeSelectItem { pattern }) - } else { - None - }; - Ok(opt_ilike) - } - - /// Parse an [`Exclude`](ExcludeSelectItem) information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_optional_select_item_exclude( - &mut self, - ) -> Result, ParserError> { - let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { - if self.consume_token(&Token::LParen) { - let columns = - self.parse_comma_separated(|parser| parser.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - Some(ExcludeSelectItem::Multiple(columns)) - } else { - let column = self.parse_identifier(false)?; - Some(ExcludeSelectItem::Single(column)) - } - } else { - None - }; - - Ok(opt_exclude) - } - - /// Parse an [`Except`](ExceptSelectItem) information for wildcard select items. - /// - /// If it is not possible to parse it, will return an option. - pub fn parse_optional_select_item_except( - &mut self, - ) -> Result, ParserError> { - let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - if self.peek_token().token == Token::LParen { - let idents = self.parse_parenthesized_column_list(Mandatory, false)?; - match &idents[..] { - [] => { - return self.expected( - "at least one column should be parsed by the expect clause", - self.peek_token(), - )?; - } - [first, idents @ ..] => Some(ExceptSelectItem { - first_element: first.clone(), - additional_elements: idents.to_vec(), - }), - } - } else { - // Clickhouse allows EXCEPT column_name - let ident = self.parse_identifier(false)?; - Some(ExceptSelectItem { - first_element: ident, - additional_elements: vec![], - }) - } - } else { - None - }; - - Ok(opt_except) - } - - /// Parse a [`Rename`](RenameSelectItem) information for wildcard select items. - pub fn parse_optional_select_item_rename( - &mut self, - ) -> Result, ParserError> { - let opt_rename = if self.parse_keyword(Keyword::RENAME) { - if self.consume_token(&Token::LParen) { - let idents = - self.parse_comma_separated(|parser| parser.parse_identifier_with_alias())?; - self.expect_token(&Token::RParen)?; - Some(RenameSelectItem::Multiple(idents)) - } else { - let ident = self.parse_identifier_with_alias()?; - Some(RenameSelectItem::Single(ident)) - } - } else { - None - }; - - Ok(opt_rename) - } - - /// Parse a [`Replace`](ReplaceSelectItem) information for wildcard select items. - pub fn parse_optional_select_item_replace( - &mut self, - ) -> Result, ParserError> { - let opt_replace = if self.parse_keyword(Keyword::REPLACE) { - if self.consume_token(&Token::LParen) { - let items = self.parse_comma_separated(|parser| { - Ok(Box::new(parser.parse_replace_elements()?)) - })?; - self.expect_token(&Token::RParen)?; - Some(ReplaceSelectItem { items }) - } else { - let tok = self.next_token(); - return self.expected("( after REPLACE but", tok); - } - } else { - None - }; - - Ok(opt_replace) - } - pub fn parse_replace_elements(&mut self) -> Result { - let expr = self.parse_expr()?; - let as_keyword = self.parse_keyword(Keyword::AS); - let ident = self.parse_identifier(false)?; - Ok(ReplaceSelectElement { - expr, - column_name: ident, - as_keyword, - }) - } - - /// Parse ASC or DESC, returns an Option with true if ASC, false of DESC or `None` if none of - /// them. - pub fn parse_asc_desc(&mut self) -> Option { - if self.parse_keyword(Keyword::ASC) { - Some(true) - } else if self.parse_keyword(Keyword::DESC) { - Some(false) - } else { - None - } - } - - /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) - pub fn parse_order_by_expr(&mut self) -> Result { - let expr = self.parse_expr()?; - - let asc = self.parse_asc_desc(); - - let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { - Some(true) - } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { - Some(false) - } else { - None - }; - - let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) - { - Some(self.parse_with_fill()?) - } else { - None - }; - - Ok(OrderByExpr { - expr, - asc, - nulls_first, - with_fill, - }) - } - - // Parse a WITH FILL clause (ClickHouse dialect) - // that follow the WITH FILL keywords in a ORDER BY clause - pub fn parse_with_fill(&mut self) -> Result { - let from = if self.parse_keyword(Keyword::FROM) { - Some(self.parse_expr()?) - } else { - None - }; - - let to = if self.parse_keyword(Keyword::TO) { - Some(self.parse_expr()?) - } else { - None - }; - - let step = if self.parse_keyword(Keyword::STEP) { - Some(self.parse_expr()?) - } else { - None - }; - - Ok(WithFill { from, to, step }) - } - - // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) - // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier - pub fn parse_interpolations(&mut self) -> Result, ParserError> { - if !self.parse_keyword(Keyword::INTERPOLATE) { - return Ok(None); - } - - if self.consume_token(&Token::LParen) { - let interpolations = - self.parse_comma_separated0(|p| p.parse_interpolation(), Token::RParen)?; - self.expect_token(&Token::RParen)?; - // INTERPOLATE () and INTERPOLATE ( ... ) variants - return Ok(Some(Interpolate { - exprs: Some(interpolations), - })); - } - - // INTERPOLATE - Ok(Some(Interpolate { exprs: None })) - } - - // Parse a INTERPOLATE expression (ClickHouse dialect) - pub fn parse_interpolation(&mut self) -> Result { - let column = self.parse_identifier(false)?; - let expr = if self.parse_keyword(Keyword::AS) { - Some(self.parse_expr()?) - } else { - None - }; - Ok(InterpolateExpr { column, expr }) - } - - /// Parse a TOP clause, MSSQL equivalent of LIMIT, - /// that follows after `SELECT [DISTINCT]`. - pub fn parse_top(&mut self) -> Result { - let quantity = if self.consume_token(&Token::LParen) { - let quantity = self.parse_expr()?; - self.expect_token(&Token::RParen)?; - Some(TopQuantity::Expr(quantity)) - } else { - let next_token = self.next_token(); - let quantity = match next_token.token { - Token::Number(s, _) => Self::parse::(s, next_token.span.start)?, - _ => self.expected("literal int", next_token)?, - }; - Some(TopQuantity::Constant(quantity)) - }; - - let percent = self.parse_keyword(Keyword::PERCENT); - - let with_ties = self.parse_keywords(&[Keyword::WITH, Keyword::TIES]); - - Ok(Top { - with_ties, - percent, - quantity, - }) - } - - /// Parse a LIMIT clause - pub fn parse_limit(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::ALL) { - Ok(None) - } else { - Ok(Some(self.parse_expr()?)) - } - } - - /// Parse an OFFSET clause - pub fn parse_offset(&mut self) -> Result { - let value = self.parse_expr()?; - let rows = if self.parse_keyword(Keyword::ROW) { - OffsetRows::Row - } else if self.parse_keyword(Keyword::ROWS) { - OffsetRows::Rows - } else { - OffsetRows::None - }; - Ok(Offset { value, rows }) - } - - /// Parse a FETCH clause - pub fn parse_fetch(&mut self) -> Result { - self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; - let (quantity, percent) = if self - .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) - .is_some() - { - (None, false) - } else { - let quantity = Expr::Value(self.parse_value()?); - let percent = self.parse_keyword(Keyword::PERCENT); - self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; - (Some(quantity), percent) - }; - let with_ties = if self.parse_keyword(Keyword::ONLY) { - false - } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { - true - } else { - return self.expected("one of ONLY or WITH TIES", self.peek_token()); - }; - Ok(Fetch { - with_ties, - percent, - quantity, - }) - } - - /// Parse a FOR UPDATE/FOR SHARE clause - pub fn parse_lock(&mut self) -> Result { - let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { - Keyword::UPDATE => LockType::Update, - Keyword::SHARE => LockType::Share, - _ => unreachable!(), - }; - let of = if self.parse_keyword(Keyword::OF) { - Some(self.parse_object_name(false)?) - } else { - None - }; - let nonblock = if self.parse_keyword(Keyword::NOWAIT) { - Some(NonBlock::Nowait) - } else if self.parse_keywords(&[Keyword::SKIP, Keyword::LOCKED]) { - Some(NonBlock::SkipLocked) - } else { - None - }; - Ok(LockClause { - lock_type, - of, - nonblock, - }) - } - - pub fn parse_values(&mut self, allow_empty: bool) -> Result { - let mut explicit_row = false; - - let rows = self.parse_comma_separated(|parser| { - if parser.parse_keyword(Keyword::ROW) { - explicit_row = true; - } - - parser.expect_token(&Token::LParen)?; - if allow_empty && parser.peek_token().token == Token::RParen { - parser.next_token(); - Ok(vec![]) - } else { - let exprs = parser.parse_comma_separated(Parser::parse_expr)?; - parser.expect_token(&Token::RParen)?; - Ok(exprs) - } - })?; - Ok(Values { explicit_row, rows }) - } - - pub fn parse_start_transaction(&mut self) -> Result { - self.expect_keyword(Keyword::TRANSACTION)?; - Ok(Statement::StartTransaction { - modes: self.parse_transaction_modes()?, - begin: false, - transaction: Some(BeginTransactionKind::Transaction), - modifier: None, - }) - } - - pub fn parse_begin(&mut self) -> Result { - let modifier = if !self.dialect.supports_start_transaction_modifier() { - None - } else if self.parse_keyword(Keyword::DEFERRED) { - Some(TransactionModifier::Deferred) - } else if self.parse_keyword(Keyword::IMMEDIATE) { - Some(TransactionModifier::Immediate) - } else if self.parse_keyword(Keyword::EXCLUSIVE) { - Some(TransactionModifier::Exclusive) - } else { - None - }; - let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { - Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), - Some(Keyword::WORK) => Some(BeginTransactionKind::Work), - _ => None, - }; - Ok(Statement::StartTransaction { - modes: self.parse_transaction_modes()?, - begin: true, - transaction, - modifier, - }) - } - - pub fn parse_end(&mut self) -> Result { - Ok(Statement::Commit { - chain: self.parse_commit_rollback_chain()?, - }) - } - - pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { - let mut modes = vec![]; - let mut required = false; - loop { - let mode = if self.parse_keywords(&[Keyword::ISOLATION, Keyword::LEVEL]) { - let iso_level = if self.parse_keywords(&[Keyword::READ, Keyword::UNCOMMITTED]) { - TransactionIsolationLevel::ReadUncommitted - } else if self.parse_keywords(&[Keyword::READ, Keyword::COMMITTED]) { - TransactionIsolationLevel::ReadCommitted - } else if self.parse_keywords(&[Keyword::REPEATABLE, Keyword::READ]) { - TransactionIsolationLevel::RepeatableRead - } else if self.parse_keyword(Keyword::SERIALIZABLE) { - TransactionIsolationLevel::Serializable - } else { - self.expected("isolation level", self.peek_token())? - }; - TransactionMode::IsolationLevel(iso_level) - } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { - TransactionMode::AccessMode(TransactionAccessMode::ReadOnly) - } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { - TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) - } else if required { - self.expected("transaction mode", self.peek_token())? - } else { - break; - }; - modes.push(mode); - // ANSI requires a comma after each transaction mode, but - // PostgreSQL, for historical reasons, does not. We follow - // PostgreSQL in making the comma optional, since that is strictly - // more general. - required = self.consume_token(&Token::Comma); - } - Ok(modes) - } - - pub fn parse_commit(&mut self) -> Result { - Ok(Statement::Commit { - chain: self.parse_commit_rollback_chain()?, - }) - } - - pub fn parse_rollback(&mut self) -> Result { - let chain = self.parse_commit_rollback_chain()?; - let savepoint = self.parse_rollback_savepoint()?; - - Ok(Statement::Rollback { chain, savepoint }) - } - - pub fn parse_commit_rollback_chain(&mut self) -> Result { - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); - if self.parse_keyword(Keyword::AND) { - let chain = !self.parse_keyword(Keyword::NO); - self.expect_keyword(Keyword::CHAIN)?; - Ok(chain) - } else { - Ok(false) - } - } - - pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::TO) { - let _ = self.parse_keyword(Keyword::SAVEPOINT); - let savepoint = self.parse_identifier(false)?; - - Ok(Some(savepoint)) - } else { - Ok(None) - } - } - - pub fn parse_deallocate(&mut self) -> Result { - let prepare = self.parse_keyword(Keyword::PREPARE); - let name = self.parse_identifier(false)?; - Ok(Statement::Deallocate { name, prepare }) - } - - pub fn parse_execute(&mut self) -> Result { - let name = self.parse_object_name(false)?; - - let has_parentheses = self.consume_token(&Token::LParen); - - let end_token = match (has_parentheses, self.peek_token().token) { - (true, _) => Token::RParen, - (false, Token::EOF) => Token::EOF, - (false, Token::Word(w)) if w.keyword == Keyword::USING => Token::Word(w), - (false, _) => Token::SemiColon, - }; - - let parameters = self.parse_comma_separated0(Parser::parse_expr, end_token)?; - - if has_parentheses { - self.expect_token(&Token::RParen)?; - } - - let mut using = vec![]; - if self.parse_keyword(Keyword::USING) { - using.push(self.parse_expr()?); - - while self.consume_token(&Token::Comma) { - using.push(self.parse_expr()?); - } - }; - - Ok(Statement::Execute { - name, - parameters, - has_parentheses, - using, - }) - } - - pub fn parse_prepare(&mut self) -> Result { - let name = self.parse_identifier(false)?; - - let mut data_types = vec![]; - if self.consume_token(&Token::LParen) { - data_types = self.parse_comma_separated(Parser::parse_data_type)?; - self.expect_token(&Token::RParen)?; - } - - self.expect_keyword(Keyword::AS)?; - let statement = Box::new(self.parse_statement()?); - Ok(Statement::Prepare { - name, - data_types, - statement, - }) - } - - pub fn parse_unload(&mut self) -> Result { - self.expect_token(&Token::LParen)?; - let query = self.parse_query()?; - self.expect_token(&Token::RParen)?; - - self.expect_keyword(Keyword::TO)?; - let to = self.parse_identifier(false)?; - - let with_options = self.parse_options(Keyword::WITH)?; - - Ok(Statement::Unload { - query, - to, - with: with_options, - }) - } - - pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { - let mut clauses = vec![]; - loop { - if self.peek_token() == Token::EOF || self.peek_token() == Token::SemiColon { - break; - } - self.expect_keyword(Keyword::WHEN)?; - - let mut clause_kind = MergeClauseKind::Matched; - if self.parse_keyword(Keyword::NOT) { - clause_kind = MergeClauseKind::NotMatched; - } - self.expect_keyword(Keyword::MATCHED)?; - - if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) - { - clause_kind = MergeClauseKind::NotMatchedBySource; - } else if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) - { - clause_kind = MergeClauseKind::NotMatchedByTarget; - } - - let predicate = if self.parse_keyword(Keyword::AND) { - Some(self.parse_expr()?) - } else { - None - }; - - self.expect_keyword(Keyword::THEN)?; - - let merge_clause = match self.parse_one_of_keywords(&[ - Keyword::UPDATE, - Keyword::INSERT, - Keyword::DELETE, - ]) { - Some(Keyword::UPDATE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return Err(ParserError::ParserError(format!( - "UPDATE is not allowed in a {clause_kind} merge clause" - ))); - } - self.expect_keyword(Keyword::SET)?; - MergeAction::Update { - assignments: self.parse_comma_separated(Parser::parse_assignment)?, - } - } - Some(Keyword::DELETE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return Err(ParserError::ParserError(format!( - "DELETE is not allowed in a {clause_kind} merge clause" - ))); - } - MergeAction::Delete - } - Some(Keyword::INSERT) => { - if !matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return Err(ParserError::ParserError(format!( - "INSERT is not allowed in a {clause_kind} merge clause" - ))); - } - let is_mysql = dialect_of!(self is MySqlDialect); - - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let kind = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::ROW) - { - MergeInsertKind::Row - } else { - self.expect_keyword(Keyword::VALUES)?; - let values = self.parse_values(is_mysql)?; - MergeInsertKind::Values(values) - }; - MergeAction::Insert(MergeInsertExpr { columns, kind }) - } - _ => { - return Err(ParserError::ParserError( - "expected UPDATE, DELETE or INSERT in merge clause".to_string(), - )); - } - }; - clauses.push(MergeClause { - clause_kind, - predicate, - action: merge_clause, - }); - } - Ok(clauses) - } - - pub fn parse_merge(&mut self) -> Result { - let into = self.parse_keyword(Keyword::INTO); - - let table = self.parse_table_factor()?; - - self.expect_keyword(Keyword::USING)?; - let source = self.parse_table_factor()?; - self.expect_keyword(Keyword::ON)?; - let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses()?; - - Ok(Statement::Merge { - into, - table, - source, - on: Box::new(on), - clauses, - }) - } - - fn parse_pragma_value(&mut self) -> Result { - match self.parse_value()? { - v @ Value::SingleQuotedString(_) => Ok(v), - v @ Value::DoubleQuotedString(_) => Ok(v), - v @ Value::Number(_, _) => Ok(v), - v @ Value::Placeholder(_) => Ok(v), - _ => { - self.prev_token(); - self.expected("number or string or ? placeholder", self.peek_token()) - } - } - } - - // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] - pub fn parse_pragma(&mut self) -> Result { - let name = self.parse_object_name(false)?; - if self.consume_token(&Token::LParen) { - let value = self.parse_pragma_value()?; - self.expect_token(&Token::RParen)?; - Ok(Statement::Pragma { - name, - value: Some(value), - is_eq: false, - }) - } else if self.consume_token(&Token::Eq) { - Ok(Statement::Pragma { - name, - value: Some(self.parse_pragma_value()?), - is_eq: true, - }) - } else { - Ok(Statement::Pragma { - name, - value: None, - is_eq: false, - }) - } - } - - /// `INSTALL [extension_name]` - pub fn parse_install(&mut self) -> Result { - let extension_name = self.parse_identifier(false)?; - - Ok(Statement::Install { extension_name }) - } - - /// Parse a SQL LOAD statement - pub fn parse_load(&mut self) -> Result { - if self.dialect.supports_load_extension() { - let extension_name = self.parse_identifier(false)?; - Ok(Statement::Load { extension_name }) - } else if self.parse_keyword(Keyword::DATA) && self.dialect.supports_load_data() { - let local = self.parse_one_of_keywords(&[Keyword::LOCAL]).is_some(); - self.expect_keyword(Keyword::INPATH)?; - let inpath = self.parse_literal_string()?; - let overwrite = self.parse_one_of_keywords(&[Keyword::OVERWRITE]).is_some(); - self.expect_keyword(Keyword::INTO)?; - self.expect_keyword(Keyword::TABLE)?; - let table_name = self.parse_object_name(false)?; - let partitioned = self.parse_insert_partition()?; - let table_format = self.parse_load_data_table_format()?; - Ok(Statement::LoadData { - local, - inpath, - overwrite, - table_name, - partitioned, - table_format, - }) - } else { - self.expected( - "`DATA` or an extension name after `LOAD`", - self.peek_token(), - ) - } - } - - /// ```sql - /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] - /// ``` - /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) - pub fn parse_optimize_table(&mut self) -> Result { - self.expect_keyword(Keyword::TABLE)?; - let name = self.parse_object_name(false)?; - let on_cluster = self.parse_optional_on_cluster()?; - - let partition = if self.parse_keyword(Keyword::PARTITION) { - if self.parse_keyword(Keyword::ID) { - Some(Partition::Identifier(self.parse_identifier(false)?)) - } else { - Some(Partition::Expr(self.parse_expr()?)) - } - } else { - None - }; - - let include_final = self.parse_keyword(Keyword::FINAL); - let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { - if self.parse_keyword(Keyword::BY) { - Some(Deduplicate::ByExpression(self.parse_expr()?)) - } else { - Some(Deduplicate::All) - } - } else { - None - }; - - Ok(Statement::OptimizeTable { - name, - on_cluster, - partition, - include_final, - deduplicate, - }) - } - - /// ```sql - /// CREATE [ { TEMPORARY | TEMP } ] SEQUENCE [ IF NOT EXISTS ] - /// ``` - /// - /// See [Postgres docs](https://www.postgresql.org/docs/current/sql-createsequence.html) for more details. - pub fn parse_create_sequence(&mut self, temporary: bool) -> Result { - //[ IF NOT EXISTS ] - let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); - //name - let name = self.parse_object_name(false)?; - //[ AS data_type ] - let mut data_type: Option = None; - if self.parse_keywords(&[Keyword::AS]) { - data_type = Some(self.parse_data_type()?) - } - let sequence_options = self.parse_create_sequence_options()?; - // [ OWNED BY { table_name.column_name | NONE } ] - let owned_by = if self.parse_keywords(&[Keyword::OWNED, Keyword::BY]) { - if self.parse_keywords(&[Keyword::NONE]) { - Some(ObjectName(vec![Ident::new("NONE")])) - } else { - Some(self.parse_object_name(false)?) - } - } else { - None - }; - Ok(Statement::CreateSequence { - temporary, - if_not_exists, - name, - data_type, - sequence_options, - owned_by, - }) - } - - fn parse_create_sequence_options(&mut self) -> Result, ParserError> { - let mut sequence_options = vec![]; - //[ INCREMENT [ BY ] increment ] - if self.parse_keywords(&[Keyword::INCREMENT]) { - if self.parse_keywords(&[Keyword::BY]) { - sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, true)); - } else { - sequence_options.push(SequenceOptions::IncrementBy(self.parse_number()?, false)); - } - } - //[ MINVALUE minvalue | NO MINVALUE ] - if self.parse_keyword(Keyword::MINVALUE) { - sequence_options.push(SequenceOptions::MinValue(Some(self.parse_number()?))); - } else if self.parse_keywords(&[Keyword::NO, Keyword::MINVALUE]) { - sequence_options.push(SequenceOptions::MinValue(None)); - } - //[ MAXVALUE maxvalue | NO MAXVALUE ] - if self.parse_keywords(&[Keyword::MAXVALUE]) { - sequence_options.push(SequenceOptions::MaxValue(Some(self.parse_number()?))); - } else if self.parse_keywords(&[Keyword::NO, Keyword::MAXVALUE]) { - sequence_options.push(SequenceOptions::MaxValue(None)); - } - - //[ START [ WITH ] start ] - if self.parse_keywords(&[Keyword::START]) { - if self.parse_keywords(&[Keyword::WITH]) { - sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, true)); - } else { - sequence_options.push(SequenceOptions::StartWith(self.parse_number()?, false)); - } - } - //[ CACHE cache ] - if self.parse_keywords(&[Keyword::CACHE]) { - sequence_options.push(SequenceOptions::Cache(self.parse_number()?)); - } - // [ [ NO ] CYCLE ] - if self.parse_keywords(&[Keyword::NO, Keyword::CYCLE]) { - sequence_options.push(SequenceOptions::Cycle(true)); - } else if self.parse_keywords(&[Keyword::CYCLE]) { - sequence_options.push(SequenceOptions::Cycle(false)); - } - - Ok(sequence_options) - } - - /// The index of the first unprocessed token. - pub fn index(&self) -> usize { - self.index - } - - pub fn parse_named_window(&mut self) -> Result { - let ident = self.parse_identifier(false)?; - self.expect_keyword(Keyword::AS)?; - - let window_expr = if self.consume_token(&Token::LParen) { - NamedWindowExpr::WindowSpec(self.parse_window_spec()?) - } else if self.dialect.supports_window_clause_named_window_reference() { - NamedWindowExpr::NamedWindow(self.parse_identifier(false)?) - } else { - return self.expected("(", self.peek_token()); - }; - - Ok(NamedWindowDefinition(ident, window_expr)) - } - - pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { - let name = self.parse_object_name(false)?; - let params = self.parse_optional_procedure_parameters()?; - self.expect_keyword(Keyword::AS)?; - self.expect_keyword(Keyword::BEGIN)?; - let statements = self.parse_statements()?; - self.expect_keyword(Keyword::END)?; - Ok(Statement::CreateProcedure { - name, - or_alter, - params, - body: statements, - }) - } - - pub fn parse_window_spec(&mut self) -> Result { - let window_name = match self.peek_token().token { - Token::Word(word) if word.keyword == Keyword::NoKeyword => { - self.parse_optional_indent()? - } - _ => None, - }; - - let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_expr)? - } else { - vec![] - }; - let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { - self.parse_comma_separated(Parser::parse_order_by_expr)? - } else { - vec![] - }; - - let window_frame = if !self.consume_token(&Token::RParen) { - let window_frame = self.parse_window_frame()?; - self.expect_token(&Token::RParen)?; - Some(window_frame) - } else { - None - }; - Ok(WindowSpec { - window_name, - partition_by, - order_by, - window_frame, - }) - } - - pub fn parse_create_type(&mut self) -> Result { - let name = self.parse_object_name(false)?; - self.expect_keyword(Keyword::AS)?; - - let mut attributes = vec![]; - if !self.consume_token(&Token::LParen) || self.consume_token(&Token::RParen) { - return Ok(Statement::CreateType { - name, - representation: UserDefinedTypeRepresentation::Composite { attributes }, - }); - } - - loop { - let attr_name = self.parse_identifier(false)?; - let attr_data_type = self.parse_data_type()?; - let attr_collation = if self.parse_keyword(Keyword::COLLATE) { - Some(self.parse_object_name(false)?) - } else { - None - }; - attributes.push(UserDefinedTypeCompositeAttributeDef { - name: attr_name, - data_type: attr_data_type, - collation: attr_collation, - }); - let comma = self.consume_token(&Token::Comma); - if self.consume_token(&Token::RParen) { - // allow a trailing comma - break; - } else if !comma { - return self.expected("',' or ')' after attribute definition", self.peek_token()); - } - } - - Ok(Statement::CreateType { - name, - representation: UserDefinedTypeRepresentation::Composite { attributes }, - }) - } - - fn parse_parenthesized_identifiers(&mut self) -> Result, ParserError> { - self.expect_token(&Token::LParen)?; - let partitions = self.parse_comma_separated(|p| p.parse_identifier(false))?; - self.expect_token(&Token::RParen)?; - Ok(partitions) - } - - fn parse_column_position(&mut self) -> Result, ParserError> { - if dialect_of!(self is MySqlDialect | GenericDialect) { - if self.parse_keyword(Keyword::FIRST) { - Ok(Some(MySQLColumnPosition::First)) - } else if self.parse_keyword(Keyword::AFTER) { - let ident = self.parse_identifier(false)?; - Ok(Some(MySQLColumnPosition::After(ident))) - } else { - Ok(None) - } - } else { - Ok(None) - } - } - - /// Consume the parser and return its underlying token buffer - pub fn into_tokens(self) -> Vec { - self.tokens - } - - /// Returns true if the next keyword indicates a sub query, i.e. SELECT or WITH - fn peek_sub_query(&mut self) -> bool { - if self - .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) - .is_some() - { - self.prev_token(); - return true; + self.prev_token(); + return true; } false } - - fn parse_show_stmt_options(&mut self) -> Result { - let show_in; - let mut filter_position = None; - if self.dialect.supports_show_like_before_in() { - if let Some(filter) = self.parse_show_statement_filter()? { - filter_position = Some(ShowStatementFilterPosition::Infix(filter)); - } - show_in = self.maybe_parse_show_stmt_in()?; - } else { - show_in = self.maybe_parse_show_stmt_in()?; - if let Some(filter) = self.parse_show_statement_filter()? { - filter_position = Some(ShowStatementFilterPosition::Suffix(filter)); - } - } - let starts_with = self.maybe_parse_show_stmt_starts_with()?; - let limit = self.maybe_parse_show_stmt_limit()?; - let from = self.maybe_parse_show_stmt_from()?; - Ok(ShowStatementOptions { - filter_position, - show_in, - starts_with, - limit, - limit_from: from, - }) - } - - fn maybe_parse_show_stmt_in(&mut self) -> Result, ParserError> { - let clause = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) { - Some(Keyword::FROM) => ShowStatementInClause::FROM, - Some(Keyword::IN) => ShowStatementInClause::IN, - None => return Ok(None), - _ => return self.expected("FROM or IN", self.peek_token()), - }; - - let (parent_type, parent_name) = match self.parse_one_of_keywords(&[ - Keyword::ACCOUNT, - Keyword::DATABASE, - Keyword::SCHEMA, - Keyword::TABLE, - Keyword::VIEW, - ]) { - // If we see these next keywords it means we don't have a parent name - Some(Keyword::DATABASE) - if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) - | self.peek_keyword(Keyword::LIMIT) => - { - (Some(ShowStatementInParentType::Database), None) - } - Some(Keyword::SCHEMA) - if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) - | self.peek_keyword(Keyword::LIMIT) => - { - (Some(ShowStatementInParentType::Schema), None) - } - Some(parent_kw) => { - // The parent name here is still optional, for example: - // SHOW TABLES IN ACCOUNT, so parsing the object name - // may fail because the statement ends. - let parent_name = self.maybe_parse(|p| p.parse_object_name(false))?; - match parent_kw { - Keyword::ACCOUNT => (Some(ShowStatementInParentType::Account), parent_name), - Keyword::DATABASE => (Some(ShowStatementInParentType::Database), parent_name), - Keyword::SCHEMA => (Some(ShowStatementInParentType::Schema), parent_name), - Keyword::TABLE => (Some(ShowStatementInParentType::Table), parent_name), - Keyword::VIEW => (Some(ShowStatementInParentType::View), parent_name), - _ => { - return self.expected( - "one of ACCOUNT, DATABASE, SCHEMA, TABLE or VIEW", - self.peek_token(), - ) - } - } - } - None => { - // Parsing MySQL style FROM tbl_name FROM db_name - // which is equivalent to FROM tbl_name.db_name - let mut parent_name = self.parse_object_name(false)?; - if self - .parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) - .is_some() - { - parent_name.0.insert(0, self.parse_identifier(false)?); - } - (None, Some(parent_name)) - } - }; - - Ok(Some(ShowStatementIn { - clause, - parent_type, - parent_name, - })) - } - - fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { - if self.parse_keywords(&[Keyword::STARTS, Keyword::WITH]) { - Ok(Some(self.parse_value()?)) - } else { - Ok(None) - } - } - - fn maybe_parse_show_stmt_limit(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::LIMIT) { - Ok(self.parse_limit()?) - } else { - Ok(None) - } - } - - fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { - if self.parse_keyword(Keyword::FROM) { - Ok(Some(self.parse_value()?)) - } else { - Ok(None) - } - } -} - -impl Word { - pub fn to_ident(&self, span: Span) -> Ident { - Ident { - value: self.value.clone(), - quote_style: self.quote_style, - span, - } - } -} - -#[cfg(test)] -mod tests { - use crate::test_utils::{all_dialects, TestedDialects}; - - use super::*; - - #[test] - fn test_prev_index() { - let sql = "SELECT version"; - all_dialects().run_parser_method(sql, |parser| { - assert_eq!(parser.peek_token(), Token::make_keyword("SELECT")); - assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); - parser.prev_token(); - assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); - assert_eq!(parser.next_token(), Token::make_word("version", None)); - parser.prev_token(); - assert_eq!(parser.peek_token(), Token::make_word("version", None)); - assert_eq!(parser.next_token(), Token::make_word("version", None)); - assert_eq!(parser.peek_token(), Token::EOF); - parser.prev_token(); - assert_eq!(parser.next_token(), Token::make_word("version", None)); - assert_eq!(parser.next_token(), Token::EOF); - assert_eq!(parser.next_token(), Token::EOF); - parser.prev_token(); - }); - } - - #[test] - fn test_peek_tokens() { - all_dialects().run_parser_method("SELECT foo AS bar FROM baz", |parser| { - assert!(matches!( - parser.peek_tokens(), - [Token::Word(Word { - keyword: Keyword::SELECT, - .. - })] - )); - - assert!(matches!( - parser.peek_tokens(), - [ - Token::Word(Word { - keyword: Keyword::SELECT, - .. - }), - Token::Word(_), - Token::Word(Word { - keyword: Keyword::AS, - .. - }), - ] - )); - - for _ in 0..4 { - parser.next_token(); - } - - assert!(matches!( - parser.peek_tokens(), - [ - Token::Word(Word { - keyword: Keyword::FROM, - .. - }), - Token::Word(_), - Token::EOF, - Token::EOF, - ] - )) - }) - } - - #[cfg(test)] - mod test_parse_data_type { - use crate::ast::{ - CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, ObjectName, TimezoneInfo, - }; - use crate::dialect::{AnsiDialect, GenericDialect}; - use crate::test_utils::TestedDialects; - - macro_rules! test_parse_data_type { - ($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{ - $dialect.run_parser_method(&*$input, |parser| { - let data_type = parser.parse_data_type().unwrap(); - assert_eq!($expected_type, data_type); - assert_eq!($input.to_string(), data_type.to_string()); - }); - }}; - } - - #[test] - fn test_ansii_character_string_types() { - // Character string types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None)); - - test_parse_data_type!( - dialect, - "CHARACTER(20)", - DataType::Character(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER(20 CHARACTERS)", - DataType::Character(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER(20 OCTETS)", - DataType::Character(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!(dialect, "CHAR", DataType::Char(None)); - - test_parse_data_type!( - dialect, - "CHAR(20)", - DataType::Char(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHAR(20 CHARACTERS)", - DataType::Char(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHAR(20 OCTETS)", - DataType::Char(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER VARYING(20)", - DataType::CharacterVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER VARYING(20 CHARACTERS)", - DataType::CharacterVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHARACTER VARYING(20 OCTETS)", - DataType::CharacterVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!( - dialect, - "CHAR VARYING(20)", - DataType::CharVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - - test_parse_data_type!( - dialect, - "CHAR VARYING(20 CHARACTERS)", - DataType::CharVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Characters) - })) - ); - - test_parse_data_type!( - dialect, - "CHAR VARYING(20 OCTETS)", - DataType::CharVarying(Some(CharacterLength::IntegerLength { - length: 20, - unit: Some(CharLengthUnits::Octets) - })) - ); - - test_parse_data_type!( - dialect, - "VARCHAR(20)", - DataType::Varchar(Some(CharacterLength::IntegerLength { - length: 20, - unit: None - })) - ); - } - - #[test] - fn test_ansii_character_large_object_types() { - // Character large object types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!( - dialect, - "CHARACTER LARGE OBJECT", - DataType::CharacterLargeObject(None) - ); - test_parse_data_type!( - dialect, - "CHARACTER LARGE OBJECT(20)", - DataType::CharacterLargeObject(Some(20)) - ); - - test_parse_data_type!( - dialect, - "CHAR LARGE OBJECT", - DataType::CharLargeObject(None) - ); - test_parse_data_type!( - dialect, - "CHAR LARGE OBJECT(20)", - DataType::CharLargeObject(Some(20)) - ); - - test_parse_data_type!(dialect, "CLOB", DataType::Clob(None)); - test_parse_data_type!(dialect, "CLOB(20)", DataType::Clob(Some(20))); - } - - #[test] - fn test_parse_custom_types() { - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!( - dialect, - "GEOMETRY", - DataType::Custom(ObjectName(vec!["GEOMETRY".into()]), vec![]) - ); - - test_parse_data_type!( - dialect, - "GEOMETRY(POINT)", - DataType::Custom( - ObjectName(vec!["GEOMETRY".into()]), - vec!["POINT".to_string()] - ) - ); - - test_parse_data_type!( - dialect, - "GEOMETRY(POINT, 4326)", - DataType::Custom( - ObjectName(vec!["GEOMETRY".into()]), - vec!["POINT".to_string(), "4326".to_string()] - ) - ); - } - - #[test] - fn test_ansii_exact_numeric_types() { - // Exact numeric types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!(dialect, "NUMERIC", DataType::Numeric(ExactNumberInfo::None)); - - test_parse_data_type!( - dialect, - "NUMERIC(2)", - DataType::Numeric(ExactNumberInfo::Precision(2)) - ); - - test_parse_data_type!( - dialect, - "NUMERIC(2,10)", - DataType::Numeric(ExactNumberInfo::PrecisionAndScale(2, 10)) - ); - - test_parse_data_type!(dialect, "DECIMAL", DataType::Decimal(ExactNumberInfo::None)); - - test_parse_data_type!( - dialect, - "DECIMAL(2)", - DataType::Decimal(ExactNumberInfo::Precision(2)) - ); - - test_parse_data_type!( - dialect, - "DECIMAL(2,10)", - DataType::Decimal(ExactNumberInfo::PrecisionAndScale(2, 10)) - ); - - test_parse_data_type!(dialect, "DEC", DataType::Dec(ExactNumberInfo::None)); - - test_parse_data_type!( - dialect, - "DEC(2)", - DataType::Dec(ExactNumberInfo::Precision(2)) - ); - - test_parse_data_type!( - dialect, - "DEC(2,10)", - DataType::Dec(ExactNumberInfo::PrecisionAndScale(2, 10)) - ); - } - - #[test] - fn test_ansii_date_type() { - // Datetime types: - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); - - test_parse_data_type!(dialect, "DATE", DataType::Date); - - test_parse_data_type!(dialect, "TIME", DataType::Time(None, TimezoneInfo::None)); - - test_parse_data_type!( - dialect, - "TIME(6)", - DataType::Time(Some(6), TimezoneInfo::None) - ); - - test_parse_data_type!( - dialect, - "TIME WITH TIME ZONE", - DataType::Time(None, TimezoneInfo::WithTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIME(6) WITH TIME ZONE", - DataType::Time(Some(6), TimezoneInfo::WithTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIME WITHOUT TIME ZONE", - DataType::Time(None, TimezoneInfo::WithoutTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIME(6) WITHOUT TIME ZONE", - DataType::Time(Some(6), TimezoneInfo::WithoutTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP", - DataType::Timestamp(None, TimezoneInfo::None) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP(22)", - DataType::Timestamp(Some(22), TimezoneInfo::None) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP(22) WITH TIME ZONE", - DataType::Timestamp(Some(22), TimezoneInfo::WithTimeZone) - ); - - test_parse_data_type!( - dialect, - "TIMESTAMP(33) WITHOUT TIME ZONE", - DataType::Timestamp(Some(33), TimezoneInfo::WithoutTimeZone) - ); - } - } - - #[test] - fn test_parse_schema_name() { - // The expected name should be identical as the input name, that's why I don't receive both - macro_rules! test_parse_schema_name { - ($input:expr, $expected_name:expr $(,)?) => {{ - all_dialects().run_parser_method(&*$input, |parser| { - let schema_name = parser.parse_schema_name().unwrap(); - // Validate that the structure is the same as expected - assert_eq!(schema_name, $expected_name); - // Validate that the input and the expected structure serialization are the same - assert_eq!(schema_name.to_string(), $input.to_string()); - }); - }}; - } - - let dummy_name = ObjectName(vec![Ident::new("dummy_name")]); - let dummy_authorization = Ident::new("dummy_authorization"); - - test_parse_schema_name!( - format!("{dummy_name}"), - SchemaName::Simple(dummy_name.clone()) - ); - - test_parse_schema_name!( - format!("AUTHORIZATION {dummy_authorization}"), - SchemaName::UnnamedAuthorization(dummy_authorization.clone()), - ); - test_parse_schema_name!( - format!("{dummy_name} AUTHORIZATION {dummy_authorization}"), - SchemaName::NamedAuthorization(dummy_name.clone(), dummy_authorization.clone()), - ); - } - - #[test] - fn mysql_parse_index_table_constraint() { - macro_rules! test_parse_table_constraint { - ($dialect:expr, $input:expr, $expected:expr $(,)?) => {{ - $dialect.run_parser_method(&*$input, |parser| { - let constraint = parser.parse_optional_table_constraint().unwrap().unwrap(); - // Validate that the structure is the same as expected - assert_eq!(constraint, $expected); - // Validate that the input and the expected structure serialization are the same - assert_eq!(constraint.to_string(), $input.to_string()); - }); - }}; - } - - let dialect = - TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})]); - - test_parse_table_constraint!( - dialect, - "INDEX (c1)", - TableConstraint::Index { - display_as_key: false, - name: None, - index_type: None, - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "KEY (c1)", - TableConstraint::Index { - display_as_key: true, - name: None, - index_type: None, - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX 'index' (c1, c2)", - TableConstraint::Index { - display_as_key: false, - name: Some(Ident::with_quote('\'', "index")), - index_type: None, - columns: vec![Ident::new("c1"), Ident::new("c2")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX USING BTREE (c1)", - TableConstraint::Index { - display_as_key: false, - name: None, - index_type: Some(IndexType::BTree), - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX USING HASH (c1)", - TableConstraint::Index { - display_as_key: false, - name: None, - index_type: Some(IndexType::Hash), - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX idx_name USING BTREE (c1)", - TableConstraint::Index { - display_as_key: false, - name: Some(Ident::new("idx_name")), - index_type: Some(IndexType::BTree), - columns: vec![Ident::new("c1")], - } - ); - - test_parse_table_constraint!( - dialect, - "INDEX idx_name USING HASH (c1)", - TableConstraint::Index { - display_as_key: false, - name: Some(Ident::new("idx_name")), - index_type: Some(IndexType::Hash), - columns: vec![Ident::new("c1")], - } - ); - } - - #[test] - fn test_tokenizer_error_loc() { - let sql = "foo '"; - let ast = Parser::parse_sql(&GenericDialect, sql); - assert_eq!( - ast, - Err(ParserError::TokenizerError( - "Unterminated string literal at Line: 1, Column: 5".to_string() - )) - ); - } - - #[test] - fn test_parser_error_loc() { - let sql = "SELECT this is a syntax error"; - let ast = Parser::parse_sql(&GenericDialect, sql); - assert_eq!( - ast, - Err(ParserError::ParserError( - "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" - .to_string() - )) - ); - } - - #[test] - fn test_nested_explain_error() { - let sql = "EXPLAIN EXPLAIN SELECT 1"; - let ast = Parser::parse_sql(&GenericDialect, sql); - assert_eq!( - ast, - Err(ParserError::ParserError( - "Explain must be root of the plan".to_string() - )) - ); - } - - #[test] - fn test_parse_multipart_identifier_positive() { - let dialect = TestedDialects::new(vec![Box::new(GenericDialect {})]); - - // parse multipart with quotes - let expected = vec![ - Ident { - value: "CATALOG".to_string(), - quote_style: None, - span: Span::empty(), - }, - Ident { - value: "F(o)o. \"bar".to_string(), - quote_style: Some('"'), - span: Span::empty(), - }, - Ident { - value: "table".to_string(), - quote_style: None, - span: Span::empty(), - }, - ]; - dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { - let actual = parser.parse_multipart_identifier().unwrap(); - assert_eq!(expected, actual); - }); - - // allow whitespace between ident parts - let expected = vec![ - Ident { - value: "CATALOG".to_string(), - quote_style: None, - span: Span::empty(), - }, - Ident { - value: "table".to_string(), - quote_style: None, - span: Span::empty(), - }, - ]; - dialect.run_parser_method("CATALOG . table", |parser| { - let actual = parser.parse_multipart_identifier().unwrap(); - assert_eq!(expected, actual); - }); - } - - #[test] - fn test_parse_multipart_identifier_negative() { - macro_rules! test_parse_multipart_identifier_error { - ($input:expr, $expected_err:expr $(,)?) => {{ - all_dialects().run_parser_method(&*$input, |parser| { - let actual_err = parser.parse_multipart_identifier().unwrap_err(); - assert_eq!(actual_err.to_string(), $expected_err); - }); - }}; - } - - test_parse_multipart_identifier_error!( - "", - "sql parser error: Empty input when parsing identifier", - ); - - test_parse_multipart_identifier_error!( - "*schema.table", - "sql parser error: Unexpected token in identifier: *", - ); - - test_parse_multipart_identifier_error!( - "schema.table*", - "sql parser error: Unexpected token in identifier: *", - ); - - test_parse_multipart_identifier_error!( - "schema.table.", - "sql parser error: Trailing period in identifier", - ); - - test_parse_multipart_identifier_error!( - "schema.*", - "sql parser error: Unexpected token following period in identifier: *", - ); - } - - #[test] - fn test_mysql_partition_selection() { - let sql = "SELECT * FROM employees PARTITION (p0, p2)"; - let expected = vec!["p0", "p2"]; - - let ast: Vec = Parser::parse_sql(&MySqlDialect {}, sql).unwrap(); - assert_eq!(ast.len(), 1); - if let Statement::Query(v) = &ast[0] { - if let SetExpr::Select(select) = &*v.body { - assert_eq!(select.from.len(), 1); - let from: &TableWithJoins = &select.from[0]; - let table_factor = &from.relation; - if let TableFactor::Table { partitions, .. } = table_factor { - let actual: Vec<&str> = partitions - .iter() - .map(|ident| ident.value.as_str()) - .collect(); - assert_eq!(expected, actual); - } - } - } else { - panic!("fail to parse mysql partition selection"); - } - } - - #[test] - fn test_replace_into_placeholders() { - let sql = "REPLACE INTO t (a) VALUES (&a)"; - - assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); - } - - #[test] - fn test_replace_into_set() { - // NOTE: This is actually valid MySQL syntax, REPLACE and INSERT, - // but the parser does not yet support it. - // https://dev.mysql.com/doc/refman/8.3/en/insert.html - let sql = "REPLACE INTO t SET a='1'"; - - assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); - } - - #[test] - fn test_replace_into_set_placeholder() { - let sql = "REPLACE INTO t SET ?"; - - assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); - } - - #[test] - fn test_replace_incomplete() { - let sql = r#"REPLACE"#; - - assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); - } } diff --git a/src/parser/msck.rs b/src/parser/msck.rs new file mode 100644 index 000000000..62aa218b4 --- /dev/null +++ b/src/parser/msck.rs @@ -0,0 +1,30 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_msck(&mut self) -> Result { + let repair = self.parse_keyword(Keyword::REPAIR); + self.expect_keyword(Keyword::TABLE)?; + let table_name = self.parse_object_name(false)?; + let partition_action = self + .maybe_parse(|parser| { + let pa = match parser.parse_one_of_keywords(&[ + Keyword::ADD, + Keyword::DROP, + Keyword::SYNC, + ]) { + Some(Keyword::ADD) => Some(AddDropSync::ADD), + Some(Keyword::DROP) => Some(AddDropSync::DROP), + Some(Keyword::SYNC) => Some(AddDropSync::SYNC), + _ => None, + }; + parser.expect_keyword(Keyword::PARTITIONS)?; + Ok(pa) + })? + .unwrap_or_default(); + Ok(Statement::Msck { + repair, + table_name, + partition_action, + }) + } +} diff --git a/src/parser/notify.rs b/src/parser/notify.rs new file mode 100644 index 000000000..3b80f6712 --- /dev/null +++ b/src/parser/notify.rs @@ -0,0 +1,13 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_notify(&mut self) -> Result { + let channel = self.parse_identifier(false)?; + let payload = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + Ok(Statement::NOTIFY { channel, payload }) + } +} diff --git a/src/parser/optimize.rs b/src/parser/optimize.rs new file mode 100644 index 000000000..e61811db0 --- /dev/null +++ b/src/parser/optimize.rs @@ -0,0 +1,42 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// ```sql + /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] + /// ``` + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + pub fn parse_optimize_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let name = self.parse_object_name(false)?; + let on_cluster = self.parse_optional_on_cluster()?; + + let partition = if self.parse_keyword(Keyword::PARTITION) { + if self.parse_keyword(Keyword::ID) { + Some(Partition::Identifier(self.parse_identifier(false)?)) + } else { + Some(Partition::Expr(self.parse_expr()?)) + } + } else { + None + }; + + let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { + if self.parse_keyword(Keyword::BY) { + Some(Deduplicate::ByExpression(self.parse_expr()?)) + } else { + Some(Deduplicate::All) + } + } else { + None + }; + + Ok(Statement::OptimizeTable { + name, + on_cluster, + partition, + include_final, + deduplicate, + }) + } +} diff --git a/src/parser/options.rs b/src/parser/options.rs new file mode 100644 index 000000000..acddbfaae --- /dev/null +++ b/src/parser/options.rs @@ -0,0 +1,128 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn maybe_parse_options( + &mut self, + keyword: Keyword, + ) -> Result>, ParserError> { + if let Token::Word(word) = self.peek_token().token { + if word.keyword == keyword { + return Ok(Some(self.parse_options(keyword)?)); + } + }; + Ok(None) + } + + pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { + if self.parse_keyword(keyword) { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) + } + } + + pub fn parse_option_clustered(&mut self) -> Result { + if self.parse_keywords(&[ + Keyword::CLUSTERED, + Keyword::COLUMNSTORE, + Keyword::INDEX, + Keyword::ORDER, + ]) { + Ok(SqlOption::Clustered( + TableOptionsClustered::ColumnstoreIndexOrder( + self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, + ), + )) + } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::COLUMNSTORE, Keyword::INDEX]) { + Ok(SqlOption::Clustered( + TableOptionsClustered::ColumnstoreIndex, + )) + } else if self.parse_keywords(&[Keyword::CLUSTERED, Keyword::INDEX]) { + self.expect_token(&Token::LParen)?; + + let columns = self.parse_comma_separated(|p| { + let name = p.parse_identifier(false)?; + let asc = p.parse_asc_desc(); + + Ok(ClusteredIndex { name, asc }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(SqlOption::Clustered(TableOptionsClustered::Index(columns))) + } else { + Err(ParserError::ParserError( + "invalid CLUSTERED sequence".to_string(), + )) + } + } + + pub fn parse_option_partition(&mut self) -> Result { + self.expect_keyword(Keyword::PARTITION)?; + self.expect_token(&Token::LParen)?; + let column_name = self.parse_identifier(false)?; + + self.expect_keyword(Keyword::RANGE)?; + let range_direction = if self.parse_keyword(Keyword::LEFT) { + Some(PartitionRangeDirection::Left) + } else if self.parse_keyword(Keyword::RIGHT) { + Some(PartitionRangeDirection::Right) + } else { + None + }; + + self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?; + self.expect_token(&Token::LParen)?; + + let for_values = self.parse_comma_separated(Parser::parse_expr)?; + + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + + Ok(SqlOption::Partition { + column_name, + range_direction, + for_values, + }) + } + + pub fn parse_options_with_keywords( + &mut self, + keywords: &[Keyword], + ) -> Result, ParserError> { + if self.parse_keywords(keywords) { + self.expect_token(&Token::LParen)?; + let options = self.parse_comma_separated(Parser::parse_sql_option)?; + self.expect_token(&Token::RParen)?; + Ok(options) + } else { + Ok(vec![]) + } + } + + pub fn parse_sql_option(&mut self) -> Result { + let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); + + match self.peek_token().token { + Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { + Ok(SqlOption::Ident(self.parse_identifier(false)?)) + } + Token::Word(w) if w.keyword == Keyword::PARTITION && is_mssql => { + self.parse_option_partition() + } + Token::Word(w) if w.keyword == Keyword::CLUSTERED && is_mssql => { + self.parse_option_clustered() + } + _ => { + let name = self.parse_identifier(false)?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + + Ok(SqlOption::KeyValue { key: name, value }) + } + } + } +} diff --git a/src/parser/pragma.rs b/src/parser/pragma.rs new file mode 100644 index 000000000..91a807940 --- /dev/null +++ b/src/parser/pragma.rs @@ -0,0 +1,42 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + pub fn parse_pragma(&mut self) -> Result { + let name = self.parse_object_name(false)?; + if self.consume_token(&Token::LParen) { + let value = self.parse_pragma_value()?; + self.expect_token(&Token::RParen)?; + Ok(Statement::Pragma { + name, + value: Some(value), + is_eq: false, + }) + } else if self.consume_token(&Token::Eq) { + Ok(Statement::Pragma { + name, + value: Some(self.parse_pragma_value()?), + is_eq: true, + }) + } else { + Ok(Statement::Pragma { + name, + value: None, + is_eq: false, + }) + } + } + + fn parse_pragma_value(&mut self) -> Result { + match self.parse_value()? { + v @ Value::SingleQuotedString(_) => Ok(v), + v @ Value::DoubleQuotedString(_) => Ok(v), + v @ Value::Number(_, _) => Ok(v), + v @ Value::Placeholder(_) => Ok(v), + _ => { + self.prev_token(); + self.expected("number or string or ? placeholder", self.peek_token()) + } + } + } +} diff --git a/src/parser/prepare.rs b/src/parser/prepare.rs new file mode 100644 index 000000000..795130781 --- /dev/null +++ b/src/parser/prepare.rs @@ -0,0 +1,21 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_prepare(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let mut data_types = vec![]; + if self.consume_token(&Token::LParen) { + data_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + } + + self.expect_keyword(Keyword::AS)?; + let statement = Box::new(self.parse_statement()?); + Ok(Statement::Prepare { + name, + data_types, + statement, + }) + } +} diff --git a/src/parser/release.rs b/src/parser/release.rs new file mode 100644 index 000000000..e3c5aa56b --- /dev/null +++ b/src/parser/release.rs @@ -0,0 +1,10 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_release(&mut self) -> Result { + let _ = self.parse_keyword(Keyword::SAVEPOINT); + let name = self.parse_identifier(false)?; + + Ok(Statement::ReleaseSavepoint { name }) + } +} diff --git a/src/parser/replace.rs b/src/parser/replace.rs new file mode 100644 index 000000000..6519b2b0a --- /dev/null +++ b/src/parser/replace.rs @@ -0,0 +1,22 @@ +use crate::parser::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse an REPLACE statement + pub fn parse_replace(&mut self) -> Result { + if !dialect_of!(self is MySqlDialect | GenericDialect) { + return parser_err!( + "Unsupported statement REPLACE", + self.peek_token().span.start + ); + } + + let mut insert = self.parse_insert()?; + if let Statement::Insert(Insert { replace_into, .. }) = &mut insert { + *replace_into = true; + } + + Ok(insert) + } +} diff --git a/src/parser/revoke.rs b/src/parser/revoke.rs new file mode 100644 index 000000000..a0c2f753a --- /dev/null +++ b/src/parser/revoke.rs @@ -0,0 +1,32 @@ +use crate::parser::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse a REVOKE statement + pub fn parse_revoke(&mut self) -> Result { + let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; + + self.expect_keyword(Keyword::FROM)?; + let grantees = self.parse_comma_separated(|p| p.parse_identifier(false))?; + + let granted_by = self + .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) + .then(|| self.parse_identifier(false).unwrap()); + + let loc = self.peek_token().span.start; + let cascade = self.parse_keyword(Keyword::CASCADE); + let restrict = self.parse_keyword(Keyword::RESTRICT); + if cascade && restrict { + return parser_err!("Cannot specify both CASCADE and RESTRICT in REVOKE", loc); + } + + Ok(Statement::Revoke { + privileges, + objects, + grantees, + granted_by, + cascade, + }) + } +} diff --git a/src/parser/rollback.rs b/src/parser/rollback.rs new file mode 100644 index 000000000..1d44762d0 --- /dev/null +++ b/src/parser/rollback.rs @@ -0,0 +1,32 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_rollback(&mut self) -> Result { + let chain = self.parse_commit_rollback_chain()?; + let savepoint = self.parse_rollback_savepoint()?; + + Ok(Statement::Rollback { chain, savepoint }) + } + + pub fn parse_commit_rollback_chain(&mut self) -> Result { + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + if self.parse_keyword(Keyword::AND) { + let chain = !self.parse_keyword(Keyword::NO); + self.expect_keyword(Keyword::CHAIN)?; + Ok(chain) + } else { + Ok(false) + } + } + + pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::TO) { + let _ = self.parse_keyword(Keyword::SAVEPOINT); + let savepoint = self.parse_identifier(false)?; + + Ok(Some(savepoint)) + } else { + Ok(None) + } + } +} diff --git a/src/parser/savepoint.rs b/src/parser/savepoint.rs new file mode 100644 index 000000000..a725e6caf --- /dev/null +++ b/src/parser/savepoint.rs @@ -0,0 +1,8 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_savepoint(&mut self) -> Result { + let name = self.parse_identifier(false)?; + Ok(Statement::Savepoint { name }) + } +} diff --git a/src/parser/select.rs b/src/parser/select.rs new file mode 100644 index 000000000..36614be8b --- /dev/null +++ b/src/parser/select.rs @@ -0,0 +1,2142 @@ +use super::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse a query expression, i.e. a `SELECT` statement optionally + /// preceded with some `WITH` CTE declarations and optionally followed + /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't + /// expect the initial keyword to be already consumed + pub fn parse_query(&mut self) -> Result, ParserError> { + let _guard = self.recursion_counter.try_decrease()?; + let with = if let Some(with_token) = self.parse_keyword_token(Keyword::WITH) { + Some(With { + with_token: with_token.into(), + recursive: self.parse_keyword(Keyword::RECURSIVE), + cte_tables: self.parse_comma_separated(Parser::parse_cte)?, + }) + } else { + None + }; + if self.parse_keyword(Keyword::INSERT) { + Ok(Query { + with, + body: self.parse_insert_setexpr_boxed()?, + limit: None, + limit_by: vec![], + order_by: None, + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + } + .into()) + } else if self.parse_keyword(Keyword::UPDATE) { + Ok(Query { + with, + body: self.parse_update_setexpr_boxed()?, + limit: None, + limit_by: vec![], + order_by: None, + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + } + .into()) + } else { + let body = self.parse_query_body(self.dialect.prec_unknown())?; + + let order_by = self.parse_optional_order_by()?; + + let mut limit = None; + let mut offset = None; + + for _x in 0..2 { + if limit.is_none() && self.parse_keyword(Keyword::LIMIT) { + limit = self.parse_limit()? + } + + if offset.is_none() && self.parse_keyword(Keyword::OFFSET) { + offset = Some(self.parse_offset()?) + } + + if self.dialect.supports_limit_comma() + && limit.is_some() + && offset.is_none() + && self.consume_token(&Token::Comma) + { + // MySQL style LIMIT x,y => LIMIT y OFFSET x. + // Check for more details. + offset = Some(Offset { + value: limit.unwrap(), + rows: OffsetRows::None, + }); + limit = Some(self.parse_expr()?); + } + } + + let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::BY) + { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let settings = self.parse_settings()?; + + let fetch = if self.parse_keyword(Keyword::FETCH) { + Some(self.parse_fetch()?) + } else { + None + }; + + let mut for_clause = None; + let mut locks = Vec::new(); + while self.parse_keyword(Keyword::FOR) { + if let Some(parsed_for_clause) = self.parse_for_clause()? { + for_clause = Some(parsed_for_clause); + break; + } else { + locks.push(self.parse_lock()?); + } + } + let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keyword(Keyword::FORMAT) + { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier(false)?; + Some(FormatClause::Identifier(ident)) + } + } else { + None + }; + + Ok(Query { + with, + body, + order_by, + limit, + limit_by, + offset, + fetch, + locks, + for_clause, + settings, + format_clause, + } + .into()) + } + } + + /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed + /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. + pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { + let loc = self.peek_token().span.start; + let all = self.parse_keyword(Keyword::ALL); + let distinct = self.parse_keyword(Keyword::DISTINCT); + if !distinct { + return Ok(None); + } + if all { + return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); + } + let on = self.parse_keyword(Keyword::ON); + if !on { + return Ok(Some(Distinct::Distinct)); + } + + self.expect_token(&Token::LParen)?; + let col_names = if self.consume_token(&Token::RParen) { + self.prev_token(); + Vec::new() + } else { + self.parse_comma_separated(Parser::parse_expr)? + }; + self.expect_token(&Token::RParen)?; + Ok(Some(Distinct::On(col_names))) + } + + /// Parse `CREATE TABLE x AS TABLE y` + pub fn parse_as_table(&mut self) -> Result { + let token1 = self.next_token(); + let token2 = self.next_token(); + let token3 = self.next_token(); + + let table_name; + let schema_name; + if token2 == Token::Period { + match token1.token { + Token::Word(w) => { + schema_name = w.value; + } + _ => { + return self.expected("Schema name", token1); + } + } + match token3.token { + Token::Word(w) => { + table_name = w.value; + } + _ => { + return self.expected("Table name", token3); + } + } + Ok(Table { + table_name: Some(table_name), + schema_name: Some(schema_name), + }) + } else { + match token1.token { + Token::Word(w) => { + table_name = w.value; + } + _ => { + return self.expected("Table name", token1); + } + } + Ok(Table { + table_name: Some(table_name), + schema_name: None, + }) + } + } + + /// Parse ASC or DESC, returns an Option with true if ASC, false of DESC or `None` if none of + /// them. + pub fn parse_asc_desc(&mut self) -> Option { + if self.parse_keyword(Keyword::ASC) { + Some(true) + } else if self.parse_keyword(Keyword::DESC) { + Some(false) + } else { + None + } + } + + pub fn parse_connect_by(&mut self) -> Result { + let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { + let relationships = self.with_state(ParserState::ConnectBy, |parser| { + parser.parse_comma_separated(Parser::parse_expr) + })?; + self.expect_keywords(&[Keyword::START, Keyword::WITH])?; + let condition = self.parse_expr()?; + (condition, relationships) + } else { + self.expect_keywords(&[Keyword::START, Keyword::WITH])?; + let condition = self.parse_expr()?; + self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?; + let relationships = self.with_state(ParserState::ConnectBy, |parser| { + parser.parse_comma_separated(Parser::parse_expr) + })?; + (condition, relationships) + }; + Ok(ConnectBy { + condition, + relationships, + }) + } + + /// Parse a CTE (`alias [( col1, col2, ... )] AS (subquery)`) + pub fn parse_cte(&mut self) -> Result { + let name = self.parse_identifier(false)?; + + let mut cte = if self.parse_keyword(Keyword::AS) { + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; + + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; + + let alias = TableAlias { + name, + columns: vec![], + }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + } else { + let columns = self.parse_table_alias_column_defs()?; + self.expect_keyword(Keyword::AS)?; + let mut is_materialized = None; + if dialect_of!(self is PostgreSqlDialect) { + if self.parse_keyword(Keyword::MATERIALIZED) { + is_materialized = Some(CteAsMaterialized::Materialized); + } else if self.parse_keywords(&[Keyword::NOT, Keyword::MATERIALIZED]) { + is_materialized = Some(CteAsMaterialized::NotMaterialized); + } + } + self.expect_token(&Token::LParen)?; + + let query = self.parse_query()?; + let closing_paren_token = self.expect_token(&Token::RParen)?; + + let alias = TableAlias { name, columns }; + Cte { + alias, + query, + from: None, + materialized: is_materialized, + closing_paren_token: closing_paren_token.into(), + } + }; + if self.parse_keyword(Keyword::FROM) { + cte.from = Some(self.parse_identifier(false)?); + } + Ok(cte) + } + + pub fn parse_derived_table_factor( + &mut self, + lateral: IsLateral, + ) -> Result { + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Derived { + lateral: match lateral { + Lateral => true, + NotLateral => false, + }, + subquery, + alias, + }) + } + + /// Parses an expression with an optional alias + /// + /// Examples: + /// + /// ```sql + /// SUM(price) AS total_price + /// ``` + /// ```sql + /// SUM(price) + /// ``` + /// + /// Example + /// ``` + /// # use sqlparser::parser::{Parser, ParserError}; + /// # use sqlparser::dialect::GenericDialect; + /// # fn main() ->Result<(), ParserError> { + /// let sql = r#"SUM("a") as "b""#; + /// let mut parser = Parser::new(&GenericDialect).try_with_sql(sql)?; + /// let expr_with_alias = parser.parse_expr_with_alias()?; + /// assert_eq!(Some("b".to_string()), expr_with_alias.alias.map(|x|x.value)); + /// # Ok(()) + /// # } + pub fn parse_expr_with_alias(&mut self) -> Result { + let expr = self.parse_expr()?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + Ok(ExprWithAlias { expr, alias }) + } + + /// Parse a FETCH clause + pub fn parse_fetch(&mut self) -> Result { + self.expect_one_of_keywords(&[Keyword::FIRST, Keyword::NEXT])?; + let (quantity, percent) = if self + .parse_one_of_keywords(&[Keyword::ROW, Keyword::ROWS]) + .is_some() + { + (None, false) + } else { + let quantity = Expr::Value(self.parse_value()?); + let percent = self.parse_keyword(Keyword::PERCENT); + self.expect_one_of_keywords(&[Keyword::ROW, Keyword::ROWS])?; + (Some(quantity), percent) + }; + let with_ties = if self.parse_keyword(Keyword::ONLY) { + false + } else if self.parse_keywords(&[Keyword::WITH, Keyword::TIES]) { + true + } else { + return self.expected("one of ONLY or WITH TIES", self.peek_token()); + }; + Ok(Fetch { + with_ties, + percent, + quantity, + }) + } + + pub fn parse_function_args(&mut self) -> Result { + let arg = if self.dialect.supports_named_fn_args_with_expr_name() { + self.maybe_parse(|p| { + let name = p.parse_expr()?; + let operator = p.parse_function_named_arg_operator()?; + let arg = p.parse_wildcard_expr()?.into(); + Ok(FunctionArg::ExprNamed { + name, + arg, + operator, + }) + })? + } else { + self.maybe_parse(|p| { + let name = p.parse_identifier(false)?; + let operator = p.parse_function_named_arg_operator()?; + let arg = p.parse_wildcard_expr()?.into(); + Ok(FunctionArg::Named { + name, + arg, + operator, + }) + })? + }; + if let Some(arg) = arg { + return Ok(arg); + } + Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) + } + + // Parse a INTERPOLATE expression (ClickHouse dialect) + pub fn parse_interpolation(&mut self) -> Result { + let column = self.parse_identifier(false)?; + let expr = if self.parse_keyword(Keyword::AS) { + Some(self.parse_expr()?) + } else { + None + }; + Ok(InterpolateExpr { column, expr }) + } + + // Parse a set of comma seperated INTERPOLATE expressions (ClickHouse dialect) + // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + pub fn parse_interpolations(&mut self) -> Result, ParserError> { + if !self.parse_keyword(Keyword::INTERPOLATE) { + return Ok(None); + } + + if self.consume_token(&Token::LParen) { + let interpolations = + self.parse_comma_separated0(|p| p.parse_interpolation(), Token::RParen)?; + self.expect_token(&Token::RParen)?; + // INTERPOLATE () and INTERPOLATE ( ... ) variants + return Ok(Some(Interpolate { + exprs: Some(interpolations), + })); + } + + // INTERPOLATE + Ok(Some(Interpolate { exprs: None })) + } + + pub fn parse_join_constraint(&mut self, natural: bool) -> Result { + if natural { + Ok(JoinConstraint::Natural) + } else if self.parse_keyword(Keyword::ON) { + let constraint = self.parse_expr()?; + Ok(JoinConstraint::On(constraint)) + } else if self.parse_keyword(Keyword::USING) { + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + Ok(JoinConstraint::Using(columns)) + } else { + Ok(JoinConstraint::None) + //self.expected("ON, or USING after JOIN", self.peek_token()) + } + } + + /// Parses MySQL's JSON_TABLE column definition. + /// For example: `id INT EXISTS PATH '$' DEFAULT '0' ON EMPTY ERROR ON ERROR` + pub fn parse_json_table_column_def(&mut self) -> Result { + if self.parse_keyword(Keyword::NESTED) { + let _has_path_keyword = self.parse_keyword(Keyword::PATH); + let path = self.parse_value()?; + self.expect_keyword(Keyword::COLUMNS)?; + let columns = self.parse_parenthesized(|p| { + p.parse_comma_separated(Self::parse_json_table_column_def) + })?; + return Ok(JsonTableColumn::Nested(JsonTableNestedColumn { + path, + columns, + })); + } + let name = self.parse_identifier(false)?; + if self.parse_keyword(Keyword::FOR) { + self.expect_keyword(Keyword::ORDINALITY)?; + return Ok(JsonTableColumn::ForOrdinality(name)); + } + let r#type = self.parse_data_type()?; + let exists = self.parse_keyword(Keyword::EXISTS); + self.expect_keyword(Keyword::PATH)?; + let path = self.parse_value()?; + let mut on_empty = None; + let mut on_error = None; + while let Some(error_handling) = self.parse_json_table_column_error_handling()? { + if self.parse_keyword(Keyword::EMPTY) { + on_empty = Some(error_handling); + } else { + self.expect_keyword(Keyword::ERROR)?; + on_error = Some(error_handling); + } + } + Ok(JsonTableColumn::Named(JsonTableNamedColumn { + name, + r#type, + path, + exists, + on_empty, + on_error, + })) + } + + /// Parse a LIMIT clause + pub fn parse_limit(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::ALL) { + Ok(None) + } else { + Ok(Some(self.parse_expr()?)) + } + } + + /// Parse a FOR UPDATE/FOR SHARE clause + pub fn parse_lock(&mut self) -> Result { + let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { + Keyword::UPDATE => LockType::Update, + Keyword::SHARE => LockType::Share, + _ => unreachable!(), + }; + let of = if self.parse_keyword(Keyword::OF) { + Some(self.parse_object_name(false)?) + } else { + None + }; + let nonblock = if self.parse_keyword(Keyword::NOWAIT) { + Some(NonBlock::Nowait) + } else if self.parse_keywords(&[Keyword::SKIP, Keyword::LOCKED]) { + Some(NonBlock::SkipLocked) + } else { + None + }; + Ok(LockClause { + lock_type, + of, + nonblock, + }) + } + + /// Parses MSSQL's `OPENJSON WITH` column definition. + /// + /// ```sql + /// colName type [ column_path ] [ AS JSON ] + /// ``` + /// + /// Reference: + pub fn parse_openjson_table_column_def(&mut self) -> Result { + let name = self.parse_identifier(false)?; + let r#type = self.parse_data_type()?; + let path = if let Token::SingleQuotedString(path) = self.peek_token().token { + self.next_token(); + Some(path) + } else { + None + }; + let as_json = self.parse_keyword(Keyword::AS); + if as_json { + self.expect_keyword(Keyword::JSON)?; + } + Ok(OpenJsonTableColumn { + name, + r#type, + path, + as_json, + }) + } + + pub fn parse_optional_order_by(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let order_by_exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; + let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + self.parse_interpolations()? + } else { + None + }; + + Ok(Some(OrderBy { + exprs: order_by_exprs, + interpolate, + })) + } else { + Ok(None) + } + } + + /// Parse an [`Except`](ExceptSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_except( + &mut self, + ) -> Result, ParserError> { + let opt_except = if self.parse_keyword(Keyword::EXCEPT) { + if self.peek_token().token == Token::LParen { + let idents = self.parse_parenthesized_column_list(Mandatory, false)?; + match &idents[..] { + [] => { + return self.expected( + "at least one column should be parsed by the expect clause", + self.peek_token(), + )?; + } + [first, idents @ ..] => Some(ExceptSelectItem { + first_element: first.clone(), + additional_elements: idents.to_vec(), + }), + } + } else { + // Clickhouse allows EXCEPT column_name + let ident = self.parse_identifier(false)?; + Some(ExceptSelectItem { + first_element: ident, + additional_elements: vec![], + }) + } + } else { + None + }; + + Ok(opt_except) + } + + /// Parse an [`Exclude`](ExcludeSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_exclude( + &mut self, + ) -> Result, ParserError> { + let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { + if self.consume_token(&Token::LParen) { + let columns = + self.parse_comma_separated(|parser| parser.parse_identifier(false))?; + self.expect_token(&Token::RParen)?; + Some(ExcludeSelectItem::Multiple(columns)) + } else { + let column = self.parse_identifier(false)?; + Some(ExcludeSelectItem::Single(column)) + } + } else { + None + }; + + Ok(opt_exclude) + } + + /// Parse an [`Ilike`](IlikeSelectItem) information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_optional_select_item_ilike( + &mut self, + ) -> Result, ParserError> { + let opt_ilike = if self.parse_keyword(Keyword::ILIKE) { + let next_token = self.next_token(); + let pattern = match next_token.token { + Token::SingleQuotedString(s) => s, + _ => return self.expected("ilike pattern", next_token), + }; + Some(IlikeSelectItem { pattern }) + } else { + None + }; + Ok(opt_ilike) + } + + /// Parse a [`Rename`](RenameSelectItem) information for wildcard select items. + pub fn parse_optional_select_item_rename( + &mut self, + ) -> Result, ParserError> { + let opt_rename = if self.parse_keyword(Keyword::RENAME) { + if self.consume_token(&Token::LParen) { + let idents = + self.parse_comma_separated(|parser| parser.parse_identifier_with_alias())?; + self.expect_token(&Token::RParen)?; + Some(RenameSelectItem::Multiple(idents)) + } else { + let ident = self.parse_identifier_with_alias()?; + Some(RenameSelectItem::Single(ident)) + } + } else { + None + }; + + Ok(opt_rename) + } + + /// Parse a [`Replace`](ReplaceSelectItem) information for wildcard select items. + pub fn parse_optional_select_item_replace( + &mut self, + ) -> Result, ParserError> { + let opt_replace = if self.parse_keyword(Keyword::REPLACE) { + if self.consume_token(&Token::LParen) { + let items = self.parse_comma_separated(|parser| { + Ok(Box::new(parser.parse_replace_elements()?)) + })?; + self.expect_token(&Token::RParen)?; + Some(ReplaceSelectItem { items }) + } else { + let tok = self.next_token(); + return self.expected("( after REPLACE but", tok); + } + } else { + None + }; + + Ok(opt_replace) + } + + /// Parse `AS identifier` when the AS is describing a table-valued object, + /// like in `... FROM generate_series(1, 10) AS t (col)`. In this case + /// the alias is allowed to optionally name the columns in the table, in + /// addition to the table itself. + pub fn parse_optional_table_alias( + &mut self, + reserved_kwds: &[Keyword], + ) -> Result, ParserError> { + match self.parse_optional_alias(reserved_kwds)? { + Some(name) => { + let columns = self.parse_table_alias_column_defs()?; + Ok(Some(TableAlias { name, columns })) + } + None => Ok(None), + } + } + + /// Parse an expression, optionally followed by ASC or DESC (used in ORDER BY) + pub fn parse_order_by_expr(&mut self) -> Result { + let expr = self.parse_expr()?; + + let asc = self.parse_asc_desc(); + + let nulls_first = if self.parse_keywords(&[Keyword::NULLS, Keyword::FIRST]) { + Some(true) + } else if self.parse_keywords(&[Keyword::NULLS, Keyword::LAST]) { + Some(false) + } else { + None + }; + + let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) + { + Some(self.parse_with_fill()?) + } else { + None + }; + + Ok(OrderByExpr { + expr, + asc, + nulls_first, + with_fill, + }) + } + + /// Parse a comma-separated list of 1+ SelectItem + pub fn parse_projection(&mut self) -> Result, ParserError> { + // BigQuery and Snowflake allow trailing commas, but only in project lists + // e.g. `SELECT 1, 2, FROM t` + // https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#trailing_commas + // https://docs.snowflake.com/en/release-notes/2024/8_11#select-supports-trailing-commas + + let trailing_commas = + self.options.trailing_commas | self.dialect.supports_projection_trailing_commas(); + + self.parse_comma_separated_with_trailing_commas(|p| p.parse_select_item(), trailing_commas) + } + + pub fn parse_replace_elements(&mut self) -> Result { + let expr = self.parse_expr()?; + let as_keyword = self.parse_keyword(Keyword::AS); + let ident = self.parse_identifier(false)?; + Ok(ReplaceSelectElement { + expr, + column_name: ident, + as_keyword, + }) + } + + /// Parse a comma-delimited list of projections after SELECT + pub fn parse_select_item(&mut self) -> Result { + match self.parse_wildcard_expr()? { + Expr::QualifiedWildcard(prefix, token) => Ok(SelectItem::QualifiedWildcard( + prefix, + self.parse_wildcard_additional_options(token.0)?, + )), + Expr::Wildcard(token) => Ok(SelectItem::Wildcard( + self.parse_wildcard_additional_options(token.0)?, + )), + Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { + parser_err!( + format!("Expected an expression, found: {}", v), + self.peek_token().span.start + ) + } + Expr::BinaryOp { + left, + op: BinaryOperator::Eq, + right, + } if self.dialect.supports_eq_alias_assignment() + && matches!(left.as_ref(), Expr::Identifier(_)) => + { + let Expr::Identifier(alias) = *left else { + return parser_err!( + "BUG: expected identifier expression as alias", + self.peek_token().span.start + ); + }; + Ok(SelectItem::ExprWithAlias { + expr: *right, + alias, + }) + } + expr => self + .parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) + .map(|alias| match alias { + Some(alias) => SelectItem::ExprWithAlias { expr, alias }, + None => SelectItem::UnnamedExpr(expr), + }), + } + } + + pub fn parse_table_and_joins(&mut self) -> Result { + let relation = self.parse_table_factor()?; + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_TABLE_ALIAS`, otherwise they may be parsed as + // a table alias. + let mut joins = vec![]; + loop { + let global = self.parse_keyword(Keyword::GLOBAL); + let join = if self.parse_keyword(Keyword::CROSS) { + let join_operator = if self.parse_keyword(Keyword::JOIN) { + JoinOperator::CrossJoin + } else if self.parse_keyword(Keyword::APPLY) { + // MSSQL extension, similar to CROSS JOIN LATERAL + JoinOperator::CrossApply + } else { + return self.expected("JOIN or APPLY after CROSS", self.peek_token()); + }; + Join { + relation: self.parse_table_factor()?, + global, + join_operator, + } + } else if self.parse_keyword(Keyword::OUTER) { + // MSSQL extension, similar to LEFT JOIN LATERAL .. ON 1=1 + self.expect_keyword(Keyword::APPLY)?; + Join { + relation: self.parse_table_factor()?, + global, + join_operator: JoinOperator::OuterApply, + } + } else if self.parse_keyword(Keyword::ASOF) { + self.expect_keyword(Keyword::JOIN)?; + let relation = self.parse_table_factor()?; + self.expect_keyword(Keyword::MATCH_CONDITION)?; + let match_condition = self.parse_parenthesized(Self::parse_expr)?; + Join { + relation, + global, + join_operator: JoinOperator::AsOf { + match_condition, + constraint: self.parse_join_constraint(false)?, + }, + } + } else { + let natural = self.parse_keyword(Keyword::NATURAL); + let peek_keyword = if let Token::Word(w) = self.peek_token().token { + w.keyword + } else { + Keyword::NoKeyword + }; + + let join_operator_type = match peek_keyword { + Keyword::INNER | Keyword::JOIN => { + let _ = self.parse_keyword(Keyword::INNER); // [ INNER ] + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Inner + } + kw @ Keyword::LEFT | kw @ Keyword::RIGHT => { + let _ = self.next_token(); // consume LEFT/RIGHT + let is_left = kw == Keyword::LEFT; + let join_type = self.parse_one_of_keywords(&[ + Keyword::OUTER, + Keyword::SEMI, + Keyword::ANTI, + Keyword::JOIN, + ]); + match join_type { + Some(Keyword::OUTER) => { + self.expect_keyword(Keyword::JOIN)?; + if is_left { + JoinOperator::LeftOuter + } else { + JoinOperator::RightOuter + } + } + Some(Keyword::SEMI) => { + self.expect_keyword(Keyword::JOIN)?; + if is_left { + JoinOperator::LeftSemi + } else { + JoinOperator::RightSemi + } + } + Some(Keyword::ANTI) => { + self.expect_keyword(Keyword::JOIN)?; + if is_left { + JoinOperator::LeftAnti + } else { + JoinOperator::RightAnti + } + } + Some(Keyword::JOIN) => { + if is_left { + JoinOperator::LeftOuter + } else { + JoinOperator::RightOuter + } + } + _ => { + return Err(ParserError::ParserError(format!( + "expected OUTER, SEMI, ANTI or JOIN after {kw:?}" + ))) + } + } + } + Keyword::ANTI => { + let _ = self.next_token(); // consume ANTI + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Anti + } + Keyword::SEMI => { + let _ = self.next_token(); // consume SEMI + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::Semi + } + Keyword::FULL => { + let _ = self.next_token(); // consume FULL + let _ = self.parse_keyword(Keyword::OUTER); // [ OUTER ] + self.expect_keyword(Keyword::JOIN)?; + JoinOperator::FullOuter + } + Keyword::OUTER => { + return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); + } + _ if natural => { + return self.expected("a join type after NATURAL", self.peek_token()); + } + _ => break, + }; + let relation = self.parse_table_factor()?; + let join_constraint = self.parse_join_constraint(natural)?; + Join { + relation, + global, + join_operator: join_operator_type(join_constraint), + } + }; + joins.push(join); + } + Ok(TableWithJoins { relation, joins }) + } + + /// Parse a given table version specifier. + /// + /// For now it only supports timestamp versioning for BigQuery and MSSQL dialects. + pub fn parse_table_version(&mut self) -> Result, ParserError> { + if dialect_of!(self is BigQueryDialect | MsSqlDialect) + && self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) + { + let expr = self.parse_expr()?; + Ok(Some(TableVersion::ForSystemTimeAsOf(expr))) + } else { + Ok(None) + } + } + + /// Parse an [`WildcardAdditionalOptions`] information for wildcard select items. + /// + /// If it is not possible to parse it, will return an option. + pub fn parse_wildcard_additional_options( + &mut self, + wildcard_token: TokenWithSpan, + ) -> Result { + let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_ilike()? + } else { + None + }; + let opt_exclude = if opt_ilike.is_none() + && dialect_of!(self is GenericDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_exclude()? + } else { + None + }; + let opt_except = if self.dialect.supports_select_wildcard_except() { + self.parse_optional_select_item_except()? + } else { + None + }; + let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) + { + self.parse_optional_select_item_replace()? + } else { + None + }; + let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + self.parse_optional_select_item_rename()? + } else { + None + }; + + Ok(WildcardAdditionalOptions { + wildcard_token: wildcard_token.into(), + opt_ilike, + opt_exclude, + opt_except, + opt_rename, + opt_replace, + }) + } + + // Parse a WITH FILL clause (ClickHouse dialect) + // that follow the WITH FILL keywords in a ORDER BY clause + pub fn parse_with_fill(&mut self) -> Result { + let from = if self.parse_keyword(Keyword::FROM) { + Some(self.parse_expr()?) + } else { + None + }; + + let to = if self.parse_keyword(Keyword::TO) { + Some(self.parse_expr()?) + } else { + None + }; + + let step = if self.parse_keyword(Keyword::STEP) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(WithFill { from, to, step }) + } + + fn parse_settings(&mut self) -> Result>, ParserError> { + let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::SETTINGS) + { + let key_values = self.parse_comma_separated(|p| { + let key = p.parse_identifier(false)?; + p.expect_token(&Token::Eq)?; + let value = p.parse_value()?; + Ok(Setting { key, value }) + })?; + Some(key_values) + } else { + None + }; + Ok(settings) + } + + /// Parse an UPDATE statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + fn parse_update_setexpr_boxed(&mut self) -> Result, ParserError> { + Ok(Box::new(SetExpr::Update(self.parse_update()?))) + } + + /// Parse a "query body", which is an expression with roughly the + /// following grammar: + /// ```sql + /// query_body ::= restricted_select | '(' subquery ')' | set_operation + /// restricted_select ::= 'SELECT' [expr_list] [ from ] [ where ] [ groupby_having ] + /// subquery ::= query_body [ order_by_limit ] + /// set_operation ::= query_body { 'UNION' | 'EXCEPT' | 'INTERSECT' } [ 'ALL' ] query_body + /// ``` + pub fn parse_query_body(&mut self, precedence: u8) -> Result, ParserError> { + // We parse the expression using a Pratt parser, as in `parse_expr()`. + // Start by parsing a restricted SELECT or a `(subquery)`: + let expr = if self.peek_keyword(Keyword::SELECT) { + SetExpr::Select(self.parse_select().map(Box::new)?) + } else if self.consume_token(&Token::LParen) { + // CTEs are not allowed here, but the parser currently accepts them + let subquery = self.parse_query()?; + self.expect_token(&Token::RParen)?; + SetExpr::Query(subquery) + } else if self.parse_keyword(Keyword::VALUES) { + let is_mysql = dialect_of!(self is MySqlDialect); + SetExpr::Values(self.parse_values(is_mysql)?) + } else if self.parse_keyword(Keyword::TABLE) { + SetExpr::Table(Box::new(self.parse_as_table()?)) + } else { + return self.expected( + "SELECT, VALUES, or a subquery in the query body", + self.peek_token(), + ); + }; + + self.parse_remaining_set_exprs(expr, precedence) + } + + /// Parse any extra set expressions that may be present in a query body + /// + /// (this is its own function to reduce required stack size in debug builds) + fn parse_remaining_set_exprs( + &mut self, + mut expr: SetExpr, + precedence: u8, + ) -> Result, ParserError> { + loop { + // The query can be optionally followed by a set operator: + let op = self.parse_set_operator(&self.peek_token().token); + let next_precedence = match op { + // UNION and EXCEPT have the same binding power and evaluate left-to-right + Some(SetOperator::Union) | Some(SetOperator::Except) => 10, + // INTERSECT has higher precedence than UNION/EXCEPT + Some(SetOperator::Intersect) => 20, + // Unexpected token or EOF => stop parsing the query body + None => break, + }; + if precedence >= next_precedence { + break; + } + self.next_token(); // skip past the set operator + let set_quantifier = self.parse_set_quantifier(&op); + expr = SetExpr::SetOperation { + left: Box::new(expr), + op: op.unwrap(), + set_quantifier, + right: self.parse_query_body(next_precedence)?, + }; + } + + Ok(expr.into()) + } + + pub fn parse_set_operator(&mut self, token: &Token) -> Option { + match token { + Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), + Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), + Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), + _ => None, + } + } + + pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { + match op { + Some(SetOperator::Except | SetOperator::Intersect | SetOperator::Union) => { + if self.parse_keywords(&[Keyword::DISTINCT, Keyword::BY, Keyword::NAME]) { + SetQuantifier::DistinctByName + } else if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + SetQuantifier::ByName + } else if self.parse_keyword(Keyword::ALL) { + if self.parse_keywords(&[Keyword::BY, Keyword::NAME]) { + SetQuantifier::AllByName + } else { + SetQuantifier::All + } + } else if self.parse_keyword(Keyword::DISTINCT) { + SetQuantifier::Distinct + } else { + SetQuantifier::None + } + } + _ => SetQuantifier::None, + } + } + + /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`) + pub fn parse_select(&mut self) -> Result { + let select_token = self.expect_keyword(Keyword::SELECT)?; + let value_table_mode = + if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { + if self.parse_keyword(Keyword::VALUE) { + Some(ValueTableMode::AsValue) + } else if self.parse_keyword(Keyword::STRUCT) { + Some(ValueTableMode::AsStruct) + } else { + self.expected("VALUE or STRUCT", self.peek_token())? + } + } else { + None + }; + + let mut top_before_distinct = false; + let mut top = None; + if self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { + top = Some(self.parse_top()?); + top_before_distinct = true; + } + let distinct = self.parse_all_or_distinct()?; + if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { + top = Some(self.parse_top()?); + } + + let projection = self.parse_projection()?; + + let into = if self.parse_keyword(Keyword::INTO) { + let temporary = self + .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) + .is_some(); + let unlogged = self.parse_keyword(Keyword::UNLOGGED); + let table = self.parse_keyword(Keyword::TABLE); + let name = self.parse_object_name(false)?; + Some(SelectInto { + temporary, + unlogged, + table, + name, + }) + } else { + None + }; + + // Note that for keywords to be properly handled here, they need to be + // added to `RESERVED_FOR_COLUMN_ALIAS` / `RESERVED_FOR_TABLE_ALIAS`, + // otherwise they may be parsed as an alias as part of the `projection` + // or `from`. + + let from = if self.parse_keyword(Keyword::FROM) { + self.parse_comma_separated(Parser::parse_table_and_joins)? + } else { + vec![] + }; + + let mut lateral_views = vec![]; + loop { + if self.parse_keywords(&[Keyword::LATERAL, Keyword::VIEW]) { + let outer = self.parse_keyword(Keyword::OUTER); + let lateral_view = self.parse_expr()?; + let lateral_view_name = self.parse_object_name(false)?; + let lateral_col_alias = self + .parse_comma_separated(|parser| { + parser.parse_optional_alias(&[ + Keyword::WHERE, + Keyword::GROUP, + Keyword::CLUSTER, + Keyword::HAVING, + Keyword::LATERAL, + ]) // This couldn't possibly be a bad idea + })? + .into_iter() + .flatten() + .collect(); + + lateral_views.push(LateralView { + lateral_view, + lateral_view_name, + lateral_col_alias, + outer, + }); + } else { + break; + } + } + + let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) + && self.parse_keyword(Keyword::PREWHERE) + { + Some(self.parse_expr()?) + } else { + None + }; + + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + let group_by = self + .parse_optional_group_by()? + .unwrap_or_else(|| GroupByExpr::Expressions(vec![], vec![])); + + let cluster_by = if self.parse_keywords(&[Keyword::CLUSTER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let distribute_by = if self.parse_keywords(&[Keyword::DISTRIBUTE, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let sort_by = if self.parse_keywords(&[Keyword::SORT, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let having = if self.parse_keyword(Keyword::HAVING) { + Some(self.parse_expr()?) + } else { + None + }; + + // Accept QUALIFY and WINDOW in any order and flag accordingly. + let (named_windows, qualify, window_before_qualify) = if self.parse_keyword(Keyword::WINDOW) + { + let named_windows = self.parse_comma_separated(Parser::parse_named_window)?; + if self.parse_keyword(Keyword::QUALIFY) { + (named_windows, Some(self.parse_expr()?), true) + } else { + (named_windows, None, true) + } + } else if self.parse_keyword(Keyword::QUALIFY) { + let qualify = Some(self.parse_expr()?); + if self.parse_keyword(Keyword::WINDOW) { + ( + self.parse_comma_separated(Parser::parse_named_window)?, + qualify, + false, + ) + } else { + (Default::default(), qualify, false) + } + } else { + Default::default() + }; + + let connect_by = if self.dialect.supports_connect_by() + && self + .parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT]) + .is_some() + { + self.prev_token(); + Some(self.parse_connect_by()?) + } else { + None + }; + + Ok(Select { + select_token: AttachedToken(select_token), + distinct, + top, + top_before_distinct, + projection, + into, + from, + lateral_views, + prewhere, + selection, + group_by, + cluster_by, + distribute_by, + sort_by, + having, + named_window: named_windows, + window_before_qualify, + qualify, + value_table_mode, + connect_by, + }) + } + + /// Parse an OFFSET clause + pub fn parse_offset(&mut self) -> Result { + let value = self.parse_expr()?; + let rows = if self.parse_keyword(Keyword::ROW) { + OffsetRows::Row + } else if self.parse_keyword(Keyword::ROWS) { + OffsetRows::Rows + } else { + OffsetRows::None + }; + Ok(Offset { value, rows }) + } + + pub fn parse_pivot_table_factor( + &mut self, + table: TableFactor, + ) -> Result { + self.expect_token(&Token::LParen)?; + let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.expect_keyword(Keyword::FOR)?; + let value_column = self.parse_object_name(false)?.0; + self.expect_keyword(Keyword::IN)?; + + self.expect_token(&Token::LParen)?; + let value_source = if self.parse_keyword(Keyword::ANY) { + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + PivotValueSource::Any(order_by) + } else if self.peek_sub_query() { + PivotValueSource::Subquery(self.parse_query()?) + } else { + PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + }; + self.expect_token(&Token::RParen)?; + + let default_on_null = + if self.parse_keywords(&[Keyword::DEFAULT, Keyword::ON, Keyword::NULL]) { + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(expr) + } else { + None + }; + + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Pivot { + table: Box::new(table), + aggregate_functions, + value_column, + value_source, + default_on_null, + alias, + }) + } + + /// Parse a TOP clause, MSSQL equivalent of LIMIT, + /// that follows after `SELECT [DISTINCT]`. + pub fn parse_top(&mut self) -> Result { + let quantity = if self.consume_token(&Token::LParen) { + let quantity = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + Some(TopQuantity::Expr(quantity)) + } else { + let next_token = self.next_token(); + let quantity = match next_token.token { + Token::Number(s, _) => Self::parse::(s, next_token.span.start)?, + _ => self.expected("literal int", next_token)?, + }; + Some(TopQuantity::Constant(quantity)) + }; + + let percent = self.parse_keyword(Keyword::PERCENT); + + let with_ties = self.parse_keywords(&[Keyword::WITH, Keyword::TIES]); + + Ok(Top { + with_ties, + percent, + quantity, + }) + } + + /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + pub fn parse_table_factor(&mut self) -> Result { + if self.parse_keyword(Keyword::LATERAL) { + // LATERAL must always be followed by a subquery or table function. + if self.consume_token(&Token::LParen) { + self.parse_derived_table_factor(Lateral) + } else { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::LParen)?; + let args = self.parse_optional_args()?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Function { + lateral: true, + name, + args, + alias, + }) + } + } else if self.parse_keyword(Keyword::TABLE) { + // parse table function (SELECT * FROM TABLE () [ AS ]) + self.expect_token(&Token::LParen)?; + let expr = self.parse_expr()?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::TableFunction { expr, alias }) + } else if self.consume_token(&Token::LParen) { + // A left paren introduces either a derived table (i.e., a subquery) + // or a nested join. It's nearly impossible to determine ahead of + // time which it is... so we just try to parse both. + // + // Here's an example that demonstrates the complexity: + // /-------------------------------------------------------\ + // | /-----------------------------------\ | + // SELECT * FROM ( ( ( (SELECT 1) UNION (SELECT 2) ) AS t1 NATURAL JOIN t2 ) ) + // ^ ^ ^ ^ + // | | | | + // | | | | + // | | | (4) belongs to a SetExpr::Query inside the subquery + // | | (3) starts a derived table (subquery) + // | (2) starts a nested join + // (1) an additional set of parens around a nested join + // + + // If the recently consumed '(' starts a derived table, the call to + // `parse_derived_table_factor` below will return success after parsing the + // subquery, followed by the closing ')', and the alias of the derived table. + // In the example above this is case (3). + if let Some(mut table) = + self.maybe_parse(|parser| parser.parse_derived_table_factor(NotLateral))? + { + while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) + { + table = match kw { + Keyword::PIVOT => self.parse_pivot_table_factor(table)?, + Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, + _ => unreachable!(), + } + } + return Ok(table); + } + + // A parsing error from `parse_derived_table_factor` indicates that the '(' we've + // recently consumed does not start a derived table (cases 1, 2, or 4). + // `maybe_parse` will ignore such an error and rewind to be after the opening '('. + + // Inside the parentheses we expect to find an (A) table factor + // followed by some joins or (B) another level of nesting. + let mut table_and_joins = self.parse_table_and_joins()?; + + #[allow(clippy::if_same_then_else)] + if !table_and_joins.joins.is_empty() { + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::NestedJoin { + table_with_joins: Box::new(table_and_joins), + alias, + }) // (A) + } else if let TableFactor::NestedJoin { + table_with_joins: _, + alias: _, + } = &table_and_joins.relation + { + // (B): `table_and_joins` (what we found inside the parentheses) + // is a nested join `(foo JOIN bar)`, not followed by other joins. + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::NestedJoin { + table_with_joins: Box::new(table_and_joins), + alias, + }) + } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { + // Dialect-specific behavior: Snowflake diverges from the + // standard and from most of the other implementations by + // allowing extra parentheses not only around a join (B), but + // around lone table names (e.g. `FROM (mytable [AS alias])`) + // and around derived tables (e.g. `FROM ((SELECT ...) + // [AS alias])`) as well. + self.expect_token(&Token::RParen)?; + + if let Some(outer_alias) = + self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)? + { + // Snowflake also allows specifying an alias *after* parens + // e.g. `FROM (mytable) AS alias` + match &mut table_and_joins.relation { + TableFactor::Derived { alias, .. } + | TableFactor::Table { alias, .. } + | TableFactor::Function { alias, .. } + | TableFactor::UNNEST { alias, .. } + | TableFactor::JsonTable { alias, .. } + | TableFactor::OpenJsonTable { alias, .. } + | TableFactor::TableFunction { alias, .. } + | TableFactor::Pivot { alias, .. } + | TableFactor::Unpivot { alias, .. } + | TableFactor::MatchRecognize { alias, .. } + | TableFactor::NestedJoin { alias, .. } => { + // but not `FROM (mytable AS alias1) AS alias2`. + if let Some(inner_alias) = alias { + return Err(ParserError::ParserError(format!( + "duplicate alias {inner_alias}" + ))); + } + // Act as if the alias was specified normally next + // to the table name: `(mytable) AS alias` -> + // `(mytable AS alias)` + alias.replace(outer_alias); + } + }; + } + // Do not store the extra set of parens in the AST + Ok(table_and_joins.relation) + } else { + // The SQL spec prohibits derived tables and bare tables from + // appearing alone in parentheses (e.g. `FROM (mytable)`) + self.expected("joined table", self.peek_token()) + } + } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) + && matches!( + self.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::VALUES, + .. + }), + Token::LParen + ] + ) + { + self.expect_keyword(Keyword::VALUES)?; + + // Snowflake and Databricks allow syntax like below: + // SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2) + // where there are no parentheses around the VALUES clause. + let values = SetExpr::Values(self.parse_values(false)?); + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Derived { + lateral: false, + subquery: Box::new(Query { + with: None, + body: Box::new(values), + order_by: None, + limit: None, + limit_by: vec![], + offset: None, + fetch: None, + locks: vec![], + for_clause: None, + settings: None, + format_clause: None, + }), + alias, + }) + } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) + && self.parse_keyword(Keyword::UNNEST) + { + self.expect_token(&Token::LParen)?; + let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { + Ok(Some(alias)) => Some(alias), + Ok(None) => None, + Err(e) => return Err(e), + }; + + let with_offset = match self.expect_keywords(&[Keyword::WITH, Keyword::OFFSET]) { + Ok(()) => true, + Err(_) => false, + }; + + let with_offset_alias = if with_offset { + match self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) { + Ok(Some(alias)) => Some(alias), + Ok(None) => None, + Err(e) => return Err(e), + } + } else { + None + }; + + Ok(TableFactor::UNNEST { + alias, + array_exprs, + with_offset, + with_offset_alias, + with_ordinality, + }) + } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { + let json_expr = self.parse_expr()?; + self.expect_token(&Token::Comma)?; + let json_path = self.parse_value()?; + self.expect_keyword(Keyword::COLUMNS)?; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; + self.expect_token(&Token::RParen)?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::JsonTable { + json_expr, + json_path, + columns, + alias, + }) + } else if self.parse_keyword_with_tokens(Keyword::OPENJSON, &[Token::LParen]) { + self.prev_token(); + self.parse_open_json_table_factor() + } else { + let name = self.parse_object_name(true)?; + + let json_path = match self.peek_token().token { + Token::LBracket if self.dialect.supports_partiql() => Some(self.parse_json_path()?), + _ => None, + }; + + let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) + && self.parse_keyword(Keyword::PARTITION) + { + self.parse_parenthesized_identifiers()? + } else { + vec![] + }; + + // Parse potential version qualifier + let version = self.parse_table_version()?; + + // Postgres, MSSQL, ClickHouse: table-valued functions: + let args = if self.consume_token(&Token::LParen) { + Some(self.parse_table_function_args()?) + } else { + None + }; + + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + + // MSSQL-specific table hints: + let mut with_hints = vec![]; + if self.parse_keyword(Keyword::WITH) { + if self.consume_token(&Token::LParen) { + with_hints = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + } else { + // rewind, as WITH may belong to the next statement's CTE + self.prev_token(); + } + }; + + let mut table = TableFactor::Table { + name, + alias, + args, + with_hints, + version, + partitions, + with_ordinality, + json_path, + }; + + while let Some(kw) = self.parse_one_of_keywords(&[Keyword::PIVOT, Keyword::UNPIVOT]) { + table = match kw { + Keyword::PIVOT => self.parse_pivot_table_factor(table)?, + Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, + _ => unreachable!(), + } + } + + if self.dialect.supports_match_recognize() + && self.parse_keyword(Keyword::MATCH_RECOGNIZE) + { + table = self.parse_match_recognize(table)?; + } + + Ok(table) + } + } + + pub fn parse_unpivot_table_factor( + &mut self, + table: TableFactor, + ) -> Result { + self.expect_token(&Token::LParen)?; + let value = self.parse_identifier(false)?; + self.expect_keyword(Keyword::FOR)?; + let name = self.parse_identifier(false)?; + self.expect_keyword(Keyword::IN)?; + let columns = self.parse_parenthesized_column_list(Mandatory, false)?; + self.expect_token(&Token::RParen)?; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::Unpivot { + table: Box::new(table), + value, + name, + columns, + alias, + }) + } + + /// Parses `OPENJSON( jsonExpression [ , path ] ) [ ]` clause, + /// assuming the `OPENJSON` keyword was already consumed. + fn parse_open_json_table_factor(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let json_expr = self.parse_expr()?; + let json_path = if self.consume_token(&Token::Comma) { + Some(self.parse_value()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + let columns = if self.parse_keyword(Keyword::WITH) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(Parser::parse_openjson_table_column_def)?; + self.expect_token(&Token::RParen)?; + columns + } else { + Vec::new() + }; + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + Ok(TableFactor::OpenJsonTable { + json_expr, + json_path, + columns, + alias, + }) + } + + fn parse_match_recognize(&mut self, table: TableFactor) -> Result { + self.expect_token(&Token::LParen)?; + + let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + + let measures = if self.parse_keyword(Keyword::MEASURES) { + self.parse_comma_separated(|p| { + let expr = p.parse_expr()?; + let _ = p.parse_keyword(Keyword::AS); + let alias = p.parse_identifier(false)?; + Ok(Measure { expr, alias }) + })? + } else { + vec![] + }; + + let rows_per_match = + if self.parse_keywords(&[Keyword::ONE, Keyword::ROW, Keyword::PER, Keyword::MATCH]) { + Some(RowsPerMatch::OneRow) + } else if self.parse_keywords(&[ + Keyword::ALL, + Keyword::ROWS, + Keyword::PER, + Keyword::MATCH, + ]) { + Some(RowsPerMatch::AllRows( + if self.parse_keywords(&[Keyword::SHOW, Keyword::EMPTY, Keyword::MATCHES]) { + Some(EmptyMatchesMode::Show) + } else if self.parse_keywords(&[ + Keyword::OMIT, + Keyword::EMPTY, + Keyword::MATCHES, + ]) { + Some(EmptyMatchesMode::Omit) + } else if self.parse_keywords(&[ + Keyword::WITH, + Keyword::UNMATCHED, + Keyword::ROWS, + ]) { + Some(EmptyMatchesMode::WithUnmatched) + } else { + None + }, + )) + } else { + None + }; + + let after_match_skip = + if self.parse_keywords(&[Keyword::AFTER, Keyword::MATCH, Keyword::SKIP]) { + if self.parse_keywords(&[Keyword::PAST, Keyword::LAST, Keyword::ROW]) { + Some(AfterMatchSkip::PastLastRow) + } else if self.parse_keywords(&[Keyword::TO, Keyword::NEXT, Keyword::ROW]) { + Some(AfterMatchSkip::ToNextRow) + } else if self.parse_keywords(&[Keyword::TO, Keyword::FIRST]) { + Some(AfterMatchSkip::ToFirst(self.parse_identifier(false)?)) + } else if self.parse_keywords(&[Keyword::TO, Keyword::LAST]) { + Some(AfterMatchSkip::ToLast(self.parse_identifier(false)?)) + } else { + let found = self.next_token(); + return self.expected("after match skip option", found); + } + } else { + None + }; + + self.expect_keyword(Keyword::PATTERN)?; + let pattern = self.parse_parenthesized(Self::parse_pattern)?; + + self.expect_keyword(Keyword::DEFINE)?; + + let symbols = self.parse_comma_separated(|p| { + let symbol = p.parse_identifier(false)?; + p.expect_keyword(Keyword::AS)?; + let definition = p.parse_expr()?; + Ok(SymbolDefinition { symbol, definition }) + })?; + + self.expect_token(&Token::RParen)?; + + let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; + + Ok(TableFactor::MatchRecognize { + table: Box::new(table), + partition_by, + order_by, + measures, + rows_per_match, + after_match_skip, + pattern, + symbols, + alias, + }) + } + + fn parse_base_pattern(&mut self) -> Result { + match self.next_token().token { + Token::Caret => Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::Start)), + Token::Placeholder(s) if s == "$" => { + Ok(MatchRecognizePattern::Symbol(MatchRecognizeSymbol::End)) + } + Token::LBrace => { + self.expect_token(&Token::Minus)?; + let symbol = self + .parse_identifier(false) + .map(MatchRecognizeSymbol::Named)?; + self.expect_token(&Token::Minus)?; + self.expect_token(&Token::RBrace)?; + Ok(MatchRecognizePattern::Exclude(symbol)) + } + Token::Word(Word { + value, + quote_style: None, + .. + }) if value == "PERMUTE" => { + self.expect_token(&Token::LParen)?; + let symbols = self.parse_comma_separated(|p| { + p.parse_identifier(false).map(MatchRecognizeSymbol::Named) + })?; + self.expect_token(&Token::RParen)?; + Ok(MatchRecognizePattern::Permute(symbols)) + } + Token::LParen => { + let pattern = self.parse_pattern()?; + self.expect_token(&Token::RParen)?; + Ok(MatchRecognizePattern::Group(Box::new(pattern))) + } + _ => { + self.prev_token(); + self.parse_identifier(false) + .map(MatchRecognizeSymbol::Named) + .map(MatchRecognizePattern::Symbol) + } + } + } + + fn parse_json_table_column_error_handling( + &mut self, + ) -> Result, ParserError> { + let res = if self.parse_keyword(Keyword::NULL) { + JsonTableColumnErrorHandling::Null + } else if self.parse_keyword(Keyword::ERROR) { + JsonTableColumnErrorHandling::Error + } else if self.parse_keyword(Keyword::DEFAULT) { + JsonTableColumnErrorHandling::Default(self.parse_value()?) + } else { + return Ok(None); + }; + self.expect_keyword(Keyword::ON)?; + Ok(Some(res)) + } + + fn parse_repetition_pattern(&mut self) -> Result { + let mut pattern = self.parse_base_pattern()?; + loop { + let token = self.next_token(); + let quantifier = match token.token { + Token::Mul => RepetitionQuantifier::ZeroOrMore, + Token::Plus => RepetitionQuantifier::OneOrMore, + Token::Placeholder(s) if s == "?" => RepetitionQuantifier::AtMostOne, + Token::LBrace => { + // quantifier is a range like {n} or {n,} or {,m} or {n,m} + let token = self.next_token(); + match token.token { + Token::Comma => { + let next_token = self.next_token(); + let Token::Number(n, _) = next_token.token else { + return self.expected("literal number", next_token); + }; + self.expect_token(&Token::RBrace)?; + RepetitionQuantifier::AtMost(Self::parse(n, token.span.start)?) + } + Token::Number(n, _) if self.consume_token(&Token::Comma) => { + let next_token = self.next_token(); + match next_token.token { + Token::Number(m, _) => { + self.expect_token(&Token::RBrace)?; + RepetitionQuantifier::Range( + Self::parse(n, token.span.start)?, + Self::parse(m, token.span.start)?, + ) + } + Token::RBrace => { + RepetitionQuantifier::AtLeast(Self::parse(n, token.span.start)?) + } + _ => { + return self.expected("} or upper bound", next_token); + } + } + } + Token::Number(n, _) => { + self.expect_token(&Token::RBrace)?; + RepetitionQuantifier::Exactly(Self::parse(n, token.span.start)?) + } + _ => return self.expected("quantifier range", token), + } + } + _ => { + self.prev_token(); + break; + } + }; + pattern = MatchRecognizePattern::Repetition(Box::new(pattern), quantifier); + } + Ok(pattern) + } + + fn parse_concat_pattern(&mut self) -> Result { + let mut patterns = vec![self.parse_repetition_pattern()?]; + while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) { + patterns.push(self.parse_repetition_pattern()?); + } + match <[MatchRecognizePattern; 1]>::try_from(patterns) { + Ok([pattern]) => Ok(pattern), + Err(patterns) => Ok(MatchRecognizePattern::Concat(patterns)), + } + } + + fn parse_pattern(&mut self) -> Result { + let pattern = self.parse_concat_pattern()?; + if self.consume_token(&Token::Pipe) { + match self.parse_pattern()? { + // flatten nested alternations + MatchRecognizePattern::Alternation(mut patterns) => { + patterns.insert(0, pattern); + Ok(MatchRecognizePattern::Alternation(patterns)) + } + next => Ok(MatchRecognizePattern::Alternation(vec![pattern, next])), + } + } else { + Ok(pattern) + } + } + + pub fn parse_optional_args(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::RParen) { + Ok(vec![]) + } else { + let args = self.parse_comma_separated(Parser::parse_function_args)?; + self.expect_token(&Token::RParen)?; + Ok(args) + } + } + + fn parse_aliased_function_call(&mut self) -> Result { + let function_name = match self.next_token().token { + Token::Word(w) => Ok(w.value), + _ => self.expected("a function identifier", self.peek_token()), + }?; + let expr = self.parse_function(ObjectName(vec![Ident::new(function_name)]))?; + let alias = if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier(false)?) + } else { + None + }; + + Ok(ExprWithAlias { expr, alias }) + } + + fn parse_function_named_arg_operator(&mut self) -> Result { + if self.parse_keyword(Keyword::VALUE) { + return Ok(FunctionArgOperator::Value); + } + let tok = self.next_token(); + match tok.token { + Token::RArrow if self.dialect.supports_named_fn_args_with_rarrow_operator() => { + Ok(FunctionArgOperator::RightArrow) + } + Token::Eq if self.dialect.supports_named_fn_args_with_eq_operator() => { + Ok(FunctionArgOperator::Equals) + } + Token::Assignment + if self + .dialect + .supports_named_fn_args_with_assignment_operator() => + { + Ok(FunctionArgOperator::Assignment) + } + Token::Colon if self.dialect.supports_named_fn_args_with_colon_operator() => { + Ok(FunctionArgOperator::Colon) + } + _ => { + self.prev_token(); + self.expected("argument operator", tok) + } + } + } + + fn parse_table_function_args(&mut self) -> Result { + if self.consume_token(&Token::RParen) { + return Ok(TableFunctionArgs { + args: vec![], + settings: None, + }); + } + let mut args = vec![]; + let settings = loop { + if let Some(settings) = self.parse_settings()? { + break Some(settings); + } + args.push(self.parse_function_args()?); + if self.is_parse_comma_separated_end() { + break None; + } + }; + self.expect_token(&Token::RParen)?; + Ok(TableFunctionArgs { args, settings }) + } + + /// Parse a parenthesized comma-separated list of table alias column definitions. + fn parse_table_alias_column_defs(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let cols = self.parse_comma_separated(|p| { + let name = p.parse_identifier(false)?; + let data_type = p.maybe_parse(|p| p.parse_data_type())?; + Ok(TableAliasColumnDef { name, data_type }) + })?; + self.expect_token(&Token::RParen)?; + Ok(cols) + } else { + Ok(vec![]) + } + } + + /// Invoke `f` after first setting the parser's `ParserState` to `state`. + /// + /// Upon return, restores the parser's state to what it started at. + fn with_state(&mut self, state: ParserState, mut f: F) -> Result + where + F: FnMut(&mut Parser) -> Result, + { + let current_state = self.state; + self.state = state; + let res = f(self); + self.state = current_state; + res + } +} diff --git a/src/parser/set.rs b/src/parser/set.rs new file mode 100644 index 000000000..9194c7a57 --- /dev/null +++ b/src/parser/set.rs @@ -0,0 +1,126 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_set(&mut self) -> Result { + let modifier = + self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); + if let Some(Keyword::HIVEVAR) = modifier { + self.expect_token(&Token::Colon)?; + } else if let Some(set_role_stmt) = + self.maybe_parse(|parser| parser.parse_set_role(modifier))? + { + return Ok(set_role_stmt); + } + + let variables = if self.parse_keywords(&[Keyword::TIME, Keyword::ZONE]) { + OneOrManyWithParens::One(ObjectName(vec!["TIMEZONE".into()])) + } else if self.dialect.supports_parenthesized_set_variables() + && self.consume_token(&Token::LParen) + { + let variables = OneOrManyWithParens::Many( + self.parse_comma_separated(|parser: &mut Parser<'a>| { + parser.parse_identifier(false) + })? + .into_iter() + .map(|ident| ObjectName(vec![ident])) + .collect(), + ); + self.expect_token(&Token::RParen)?; + variables + } else { + OneOrManyWithParens::One(self.parse_object_name(false)?) + }; + + if matches!(&variables, OneOrManyWithParens::One(variable) if variable.to_string().eq_ignore_ascii_case("NAMES") + && dialect_of!(self is MySqlDialect | GenericDialect)) + { + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(Statement::SetNamesDefault {}); + } + + let charset_name = self.parse_literal_string()?; + let collation_name = if self.parse_one_of_keywords(&[Keyword::COLLATE]).is_some() { + Some(self.parse_literal_string()?) + } else { + None + }; + + return Ok(Statement::SetNames { + charset_name, + collation_name, + }); + } + + let parenthesized_assignment = matches!(&variables, OneOrManyWithParens::Many(_)); + + if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { + if parenthesized_assignment { + self.expect_token(&Token::LParen)?; + } + + let mut values = vec![]; + loop { + let value = if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + + values.push(value); + if self.consume_token(&Token::Comma) { + continue; + } + + if parenthesized_assignment { + self.expect_token(&Token::RParen)?; + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variables, + value: values, + }); + } + } + + let OneOrManyWithParens::One(variable) = variables else { + return self.expected("set variable", self.peek_token()); + }; + + if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { + // for some db (e.g. postgresql), SET TIME ZONE is an alias for SET TIMEZONE [TO|=] + match self.parse_expr() { + Ok(expr) => Ok(Statement::SetTimeZone { + local: modifier == Some(Keyword::LOCAL), + value: expr, + }), + _ => self.expected("timezone value", self.peek_token())?, + } + } else if variable.to_string() == "CHARACTERISTICS" { + self.expect_keywords(&[Keyword::AS, Keyword::TRANSACTION])?; + Ok(Statement::SetTransaction { + modes: self.parse_transaction_modes()?, + snapshot: None, + session: true, + }) + } else if variable.to_string() == "TRANSACTION" && modifier.is_none() { + if self.parse_keyword(Keyword::SNAPSHOT) { + let snapshot_id = self.parse_value()?; + return Ok(Statement::SetTransaction { + modes: vec![], + snapshot: Some(snapshot_id), + session: false, + }); + } + Ok(Statement::SetTransaction { + modes: self.parse_transaction_modes()?, + snapshot: None, + session: false, + }) + } else { + self.expected("equals sign or TO", self.peek_token()) + } + } +} diff --git a/src/parser/show.rs b/src/parser/show.rs new file mode 100644 index 000000000..af7e20432 --- /dev/null +++ b/src/parser/show.rs @@ -0,0 +1,299 @@ +use super::*; + +impl<'a> Parser<'a> { + pub fn parse_show(&mut self) -> Result { + let terse = self.parse_keyword(Keyword::TERSE); + let extended = self.parse_keyword(Keyword::EXTENDED); + let full = self.parse_keyword(Keyword::FULL); + let session = self.parse_keyword(Keyword::SESSION); + let global = self.parse_keyword(Keyword::GLOBAL); + let external = self.parse_keyword(Keyword::EXTERNAL); + if self + .parse_one_of_keywords(&[Keyword::COLUMNS, Keyword::FIELDS]) + .is_some() + { + Ok(self.parse_show_columns(extended, full)?) + } else if self.parse_keyword(Keyword::TABLES) { + Ok(self.parse_show_tables(terse, extended, full, external)?) + } else if self.parse_keywords(&[Keyword::MATERIALIZED, Keyword::VIEWS]) { + Ok(self.parse_show_views(terse, true)?) + } else if self.parse_keyword(Keyword::VIEWS) { + Ok(self.parse_show_views(terse, false)?) + } else if self.parse_keyword(Keyword::FUNCTIONS) { + Ok(self.parse_show_functions()?) + } else if extended || full { + Err(ParserError::ParserError( + "EXTENDED/FULL are not supported with this type of SHOW query".to_string(), + )) + } else if self.parse_one_of_keywords(&[Keyword::CREATE]).is_some() { + Ok(self.parse_show_create()?) + } else if self.parse_keyword(Keyword::COLLATION) { + Ok(self.parse_show_collation()?) + } else if self.parse_keyword(Keyword::VARIABLES) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + Ok(Statement::ShowVariables { + filter: self.parse_show_statement_filter()?, + session, + global, + }) + } else if self.parse_keyword(Keyword::STATUS) + && dialect_of!(self is MySqlDialect | GenericDialect) + { + Ok(Statement::ShowStatus { + filter: self.parse_show_statement_filter()?, + session, + global, + }) + } else if self.parse_keyword(Keyword::DATABASES) { + self.parse_show_databases(terse) + } else if self.parse_keyword(Keyword::SCHEMAS) { + self.parse_show_schemas(terse) + } else { + Ok(Statement::ShowVariable { + variable: self.parse_identifiers()?, + }) + } + } + + fn parse_show_databases(&mut self, terse: bool) -> Result { + let history = self.parse_keyword(Keyword::HISTORY); + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowDatabases { + terse, + history, + show_options, + }) + } + + fn parse_show_schemas(&mut self, terse: bool) -> Result { + let history = self.parse_keyword(Keyword::HISTORY); + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowSchemas { + terse, + history, + show_options, + }) + } + + pub fn parse_show_create(&mut self) -> Result { + let obj_type = match self.expect_one_of_keywords(&[ + Keyword::TABLE, + Keyword::TRIGGER, + Keyword::FUNCTION, + Keyword::PROCEDURE, + Keyword::EVENT, + Keyword::VIEW, + ])? { + Keyword::TABLE => Ok(ShowCreateObject::Table), + Keyword::TRIGGER => Ok(ShowCreateObject::Trigger), + Keyword::FUNCTION => Ok(ShowCreateObject::Function), + Keyword::PROCEDURE => Ok(ShowCreateObject::Procedure), + Keyword::EVENT => Ok(ShowCreateObject::Event), + Keyword::VIEW => Ok(ShowCreateObject::View), + keyword => Err(ParserError::ParserError(format!( + "Unable to map keyword to ShowCreateObject: {keyword:?}" + ))), + }?; + + let obj_name = self.parse_object_name(false)?; + + Ok(Statement::ShowCreate { obj_type, obj_name }) + } + + pub fn parse_show_columns( + &mut self, + extended: bool, + full: bool, + ) -> Result { + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowColumns { + extended, + full, + show_options, + }) + } + + fn parse_show_tables( + &mut self, + terse: bool, + extended: bool, + full: bool, + external: bool, + ) -> Result { + let history = !external && self.parse_keyword(Keyword::HISTORY); + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowTables { + terse, + history, + extended, + full, + external, + show_options, + }) + } + + fn parse_show_views( + &mut self, + terse: bool, + materialized: bool, + ) -> Result { + let show_options = self.parse_show_stmt_options()?; + Ok(Statement::ShowViews { + materialized, + terse, + show_options, + }) + } + + pub fn parse_show_functions(&mut self) -> Result { + let filter = self.parse_show_statement_filter()?; + Ok(Statement::ShowFunctions { filter }) + } + + pub fn parse_show_collation(&mut self) -> Result { + let filter = self.parse_show_statement_filter()?; + Ok(Statement::ShowCollation { filter }) + } + + pub fn parse_show_statement_filter( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::LIKE) { + Ok(Some(ShowStatementFilter::Like( + self.parse_literal_string()?, + ))) + } else if self.parse_keyword(Keyword::ILIKE) { + Ok(Some(ShowStatementFilter::ILike( + self.parse_literal_string()?, + ))) + } else if self.parse_keyword(Keyword::WHERE) { + Ok(Some(ShowStatementFilter::Where(self.parse_expr()?))) + } else { + self.maybe_parse(|parser| -> Result { + parser.parse_literal_string() + })? + .map_or(Ok(None), |filter| { + Ok(Some(ShowStatementFilter::NoKeyword(filter))) + }) + } + } + + fn parse_show_stmt_options(&mut self) -> Result { + let show_in; + let mut filter_position = None; + if self.dialect.supports_show_like_before_in() { + if let Some(filter) = self.parse_show_statement_filter()? { + filter_position = Some(ShowStatementFilterPosition::Infix(filter)); + } + show_in = self.maybe_parse_show_stmt_in()?; + } else { + show_in = self.maybe_parse_show_stmt_in()?; + if let Some(filter) = self.parse_show_statement_filter()? { + filter_position = Some(ShowStatementFilterPosition::Suffix(filter)); + } + } + let starts_with = self.maybe_parse_show_stmt_starts_with()?; + let limit = self.maybe_parse_show_stmt_limit()?; + let from = self.maybe_parse_show_stmt_from()?; + Ok(ShowStatementOptions { + filter_position, + show_in, + starts_with, + limit, + limit_from: from, + }) + } + + fn maybe_parse_show_stmt_in(&mut self) -> Result, ParserError> { + let clause = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) { + Some(Keyword::FROM) => ShowStatementInClause::FROM, + Some(Keyword::IN) => ShowStatementInClause::IN, + None => return Ok(None), + _ => return self.expected("FROM or IN", self.peek_token()), + }; + + let (parent_type, parent_name) = match self.parse_one_of_keywords(&[ + Keyword::ACCOUNT, + Keyword::DATABASE, + Keyword::SCHEMA, + Keyword::TABLE, + Keyword::VIEW, + ]) { + // If we see these next keywords it means we don't have a parent name + Some(Keyword::DATABASE) + if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) + | self.peek_keyword(Keyword::LIMIT) => + { + (Some(ShowStatementInParentType::Database), None) + } + Some(Keyword::SCHEMA) + if self.peek_keywords(&[Keyword::STARTS, Keyword::WITH]) + | self.peek_keyword(Keyword::LIMIT) => + { + (Some(ShowStatementInParentType::Schema), None) + } + Some(parent_kw) => { + // The parent name here is still optional, for example: + // SHOW TABLES IN ACCOUNT, so parsing the object name + // may fail because the statement ends. + let parent_name = self.maybe_parse(|p| p.parse_object_name(false))?; + match parent_kw { + Keyword::ACCOUNT => (Some(ShowStatementInParentType::Account), parent_name), + Keyword::DATABASE => (Some(ShowStatementInParentType::Database), parent_name), + Keyword::SCHEMA => (Some(ShowStatementInParentType::Schema), parent_name), + Keyword::TABLE => (Some(ShowStatementInParentType::Table), parent_name), + Keyword::VIEW => (Some(ShowStatementInParentType::View), parent_name), + _ => { + return self.expected( + "one of ACCOUNT, DATABASE, SCHEMA, TABLE or VIEW", + self.peek_token(), + ) + } + } + } + None => { + // Parsing MySQL style FROM tbl_name FROM db_name + // which is equivalent to FROM tbl_name.db_name + let mut parent_name = self.parse_object_name(false)?; + if self + .parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) + .is_some() + { + parent_name.0.insert(0, self.parse_identifier(false)?); + } + (None, Some(parent_name)) + } + }; + + Ok(Some(ShowStatementIn { + clause, + parent_type, + parent_name, + })) + } + + fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { + if self.parse_keywords(&[Keyword::STARTS, Keyword::WITH]) { + Ok(Some(self.parse_value()?)) + } else { + Ok(None) + } + } + + fn maybe_parse_show_stmt_limit(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::LIMIT) { + Ok(self.parse_limit()?) + } else { + Ok(None) + } + } + + fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { + if self.parse_keyword(Keyword::FROM) { + Ok(Some(self.parse_value()?)) + } else { + Ok(None) + } + } +} diff --git a/src/parser/start.rs b/src/parser/start.rs new file mode 100644 index 000000000..fbceafb22 --- /dev/null +++ b/src/parser/start.rs @@ -0,0 +1,74 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_start_transaction(&mut self) -> Result { + self.expect_keyword(Keyword::TRANSACTION)?; + Ok(Statement::StartTransaction { + modes: self.parse_transaction_modes()?, + begin: false, + transaction: Some(BeginTransactionKind::Transaction), + modifier: None, + }) + } + + pub fn parse_begin(&mut self) -> Result { + let modifier = if !self.dialect.supports_start_transaction_modifier() { + None + } else if self.parse_keyword(Keyword::DEFERRED) { + Some(TransactionModifier::Deferred) + } else if self.parse_keyword(Keyword::IMMEDIATE) { + Some(TransactionModifier::Immediate) + } else if self.parse_keyword(Keyword::EXCLUSIVE) { + Some(TransactionModifier::Exclusive) + } else { + None + }; + let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { + Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), + Some(Keyword::WORK) => Some(BeginTransactionKind::Work), + _ => None, + }; + Ok(Statement::StartTransaction { + modes: self.parse_transaction_modes()?, + begin: true, + transaction, + modifier, + }) + } + + pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { + let mut modes = vec![]; + let mut required = false; + loop { + let mode = if self.parse_keywords(&[Keyword::ISOLATION, Keyword::LEVEL]) { + let iso_level = if self.parse_keywords(&[Keyword::READ, Keyword::UNCOMMITTED]) { + TransactionIsolationLevel::ReadUncommitted + } else if self.parse_keywords(&[Keyword::READ, Keyword::COMMITTED]) { + TransactionIsolationLevel::ReadCommitted + } else if self.parse_keywords(&[Keyword::REPEATABLE, Keyword::READ]) { + TransactionIsolationLevel::RepeatableRead + } else if self.parse_keyword(Keyword::SERIALIZABLE) { + TransactionIsolationLevel::Serializable + } else { + self.expected("isolation level", self.peek_token())? + }; + TransactionMode::IsolationLevel(iso_level) + } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { + TransactionMode::AccessMode(TransactionAccessMode::ReadOnly) + } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { + TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) + } else if required { + self.expected("transaction mode", self.peek_token())? + } else { + break; + }; + modes.push(mode); + // ANSI requires a comma after each transaction mode, but + // PostgreSQL, for historical reasons, does not. We follow + // PostgreSQL in making the comma optional, since that is strictly + // more general. + required = self.consume_token(&Token::Comma); + } + Ok(modes) + } +} diff --git a/src/parser/tests.rs b/src/parser/tests.rs new file mode 100644 index 000000000..d7d06f139 --- /dev/null +++ b/src/parser/tests.rs @@ -0,0 +1,695 @@ +use crate::test_utils::{all_dialects, TestedDialects}; + +use super::*; + +#[test] +fn test_prev_index() { + let sql = "SELECT version"; + all_dialects().run_parser_method(sql, |parser| { + assert_eq!(parser.peek_token(), Token::make_keyword("SELECT")); + assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); + parser.prev_token(); + assert_eq!(parser.next_token(), Token::make_keyword("SELECT")); + assert_eq!(parser.next_token(), Token::make_word("version", None)); + parser.prev_token(); + assert_eq!(parser.peek_token(), Token::make_word("version", None)); + assert_eq!(parser.next_token(), Token::make_word("version", None)); + assert_eq!(parser.peek_token(), Token::EOF); + parser.prev_token(); + assert_eq!(parser.next_token(), Token::make_word("version", None)); + assert_eq!(parser.next_token(), Token::EOF); + assert_eq!(parser.next_token(), Token::EOF); + parser.prev_token(); + }); +} + +#[test] +fn test_peek_tokens() { + all_dialects().run_parser_method("SELECT foo AS bar FROM baz", |parser| { + assert!(matches!( + parser.peek_tokens(), + [Token::Word(Word { + keyword: Keyword::SELECT, + .. + })] + )); + + assert!(matches!( + parser.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::SELECT, + .. + }), + Token::Word(_), + Token::Word(Word { + keyword: Keyword::AS, + .. + }), + ] + )); + + for _ in 0..4 { + parser.next_token(); + } + + assert!(matches!( + parser.peek_tokens(), + [ + Token::Word(Word { + keyword: Keyword::FROM, + .. + }), + Token::Word(_), + Token::EOF, + Token::EOF, + ] + )) + }) +} + +#[cfg(test)] +mod test_parse_data_type { + use crate::ast::{ + CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, ObjectName, TimezoneInfo, + }; + use crate::dialect::{AnsiDialect, GenericDialect}; + use crate::test_utils::TestedDialects; + + macro_rules! test_parse_data_type { + ($dialect:expr, $input:expr, $expected_type:expr $(,)?) => {{ + $dialect.run_parser_method(&*$input, |parser| { + let data_type = parser.parse_data_type().unwrap(); + assert_eq!($expected_type, data_type); + assert_eq!($input.to_string(), data_type.to_string()); + }); + }}; + } + + #[test] + fn test_ansii_character_string_types() { + // Character string types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None)); + + test_parse_data_type!( + dialect, + "CHARACTER(20)", + DataType::Character(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER(20 CHARACTERS)", + DataType::Character(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER(20 OCTETS)", + DataType::Character(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!(dialect, "CHAR", DataType::Char(None)); + + test_parse_data_type!( + dialect, + "CHAR(20)", + DataType::Char(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHAR(20 CHARACTERS)", + DataType::Char(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR(20 OCTETS)", + DataType::Char(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20)", + DataType::CharacterVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20 CHARACTERS)", + DataType::CharacterVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20 OCTETS)", + DataType::CharacterVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20)", + DataType::CharVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20 CHARACTERS)", + DataType::CharVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20 OCTETS)", + DataType::CharVarying(Some(CharacterLength::IntegerLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "VARCHAR(20)", + DataType::Varchar(Some(CharacterLength::IntegerLength { + length: 20, + unit: None + })) + ); + } + + #[test] + fn test_ansii_character_large_object_types() { + // Character large object types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!( + dialect, + "CHARACTER LARGE OBJECT", + DataType::CharacterLargeObject(None) + ); + test_parse_data_type!( + dialect, + "CHARACTER LARGE OBJECT(20)", + DataType::CharacterLargeObject(Some(20)) + ); + + test_parse_data_type!( + dialect, + "CHAR LARGE OBJECT", + DataType::CharLargeObject(None) + ); + test_parse_data_type!( + dialect, + "CHAR LARGE OBJECT(20)", + DataType::CharLargeObject(Some(20)) + ); + + test_parse_data_type!(dialect, "CLOB", DataType::Clob(None)); + test_parse_data_type!(dialect, "CLOB(20)", DataType::Clob(Some(20))); + } + + #[test] + fn test_parse_custom_types() { + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!( + dialect, + "GEOMETRY", + DataType::Custom(ObjectName(vec!["GEOMETRY".into()]), vec![]) + ); + + test_parse_data_type!( + dialect, + "GEOMETRY(POINT)", + DataType::Custom( + ObjectName(vec!["GEOMETRY".into()]), + vec!["POINT".to_string()] + ) + ); + + test_parse_data_type!( + dialect, + "GEOMETRY(POINT, 4326)", + DataType::Custom( + ObjectName(vec!["GEOMETRY".into()]), + vec!["POINT".to_string(), "4326".to_string()] + ) + ); + } + + #[test] + fn test_ansii_exact_numeric_types() { + // Exact numeric types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!(dialect, "NUMERIC", DataType::Numeric(ExactNumberInfo::None)); + + test_parse_data_type!( + dialect, + "NUMERIC(2)", + DataType::Numeric(ExactNumberInfo::Precision(2)) + ); + + test_parse_data_type!( + dialect, + "NUMERIC(2,10)", + DataType::Numeric(ExactNumberInfo::PrecisionAndScale(2, 10)) + ); + + test_parse_data_type!(dialect, "DECIMAL", DataType::Decimal(ExactNumberInfo::None)); + + test_parse_data_type!( + dialect, + "DECIMAL(2)", + DataType::Decimal(ExactNumberInfo::Precision(2)) + ); + + test_parse_data_type!( + dialect, + "DECIMAL(2,10)", + DataType::Decimal(ExactNumberInfo::PrecisionAndScale(2, 10)) + ); + + test_parse_data_type!(dialect, "DEC", DataType::Dec(ExactNumberInfo::None)); + + test_parse_data_type!( + dialect, + "DEC(2)", + DataType::Dec(ExactNumberInfo::Precision(2)) + ); + + test_parse_data_type!( + dialect, + "DEC(2,10)", + DataType::Dec(ExactNumberInfo::PrecisionAndScale(2, 10)) + ); + } + + #[test] + fn test_ansii_date_type() { + // Datetime types: + let dialect = + TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(AnsiDialect {})]); + + test_parse_data_type!(dialect, "DATE", DataType::Date); + + test_parse_data_type!(dialect, "TIME", DataType::Time(None, TimezoneInfo::None)); + + test_parse_data_type!( + dialect, + "TIME(6)", + DataType::Time(Some(6), TimezoneInfo::None) + ); + + test_parse_data_type!( + dialect, + "TIME WITH TIME ZONE", + DataType::Time(None, TimezoneInfo::WithTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIME(6) WITH TIME ZONE", + DataType::Time(Some(6), TimezoneInfo::WithTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIME WITHOUT TIME ZONE", + DataType::Time(None, TimezoneInfo::WithoutTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIME(6) WITHOUT TIME ZONE", + DataType::Time(Some(6), TimezoneInfo::WithoutTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP", + DataType::Timestamp(None, TimezoneInfo::None) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP(22)", + DataType::Timestamp(Some(22), TimezoneInfo::None) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP(22) WITH TIME ZONE", + DataType::Timestamp(Some(22), TimezoneInfo::WithTimeZone) + ); + + test_parse_data_type!( + dialect, + "TIMESTAMP(33) WITHOUT TIME ZONE", + DataType::Timestamp(Some(33), TimezoneInfo::WithoutTimeZone) + ); + } +} + +#[test] +fn test_parse_schema_name() { + // The expected name should be identical as the input name, that's why I don't receive both + macro_rules! test_parse_schema_name { + ($input:expr, $expected_name:expr $(,)?) => {{ + all_dialects().run_parser_method(&*$input, |parser| { + let schema_name = parser.parse_schema_name().unwrap(); + // Validate that the structure is the same as expected + assert_eq!(schema_name, $expected_name); + // Validate that the input and the expected structure serialization are the same + assert_eq!(schema_name.to_string(), $input.to_string()); + }); + }}; + } + + let dummy_name = ObjectName(vec![Ident::new("dummy_name")]); + let dummy_authorization = Ident::new("dummy_authorization"); + + test_parse_schema_name!( + format!("{dummy_name}"), + SchemaName::Simple(dummy_name.clone()) + ); + + test_parse_schema_name!( + format!("AUTHORIZATION {dummy_authorization}"), + SchemaName::UnnamedAuthorization(dummy_authorization.clone()), + ); + test_parse_schema_name!( + format!("{dummy_name} AUTHORIZATION {dummy_authorization}"), + SchemaName::NamedAuthorization(dummy_name.clone(), dummy_authorization.clone()), + ); +} + +#[test] +fn mysql_parse_index_table_constraint() { + macro_rules! test_parse_table_constraint { + ($dialect:expr, $input:expr, $expected:expr $(,)?) => {{ + $dialect.run_parser_method(&*$input, |parser| { + let constraint = parser.parse_optional_table_constraint().unwrap().unwrap(); + // Validate that the structure is the same as expected + assert_eq!(constraint, $expected); + // Validate that the input and the expected structure serialization are the same + assert_eq!(constraint.to_string(), $input.to_string()); + }); + }}; + } + + let dialect = TestedDialects::new(vec![Box::new(GenericDialect {}), Box::new(MySqlDialect {})]); + + test_parse_table_constraint!( + dialect, + "INDEX (c1)", + TableConstraint::Index { + display_as_key: false, + name: None, + index_type: None, + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "KEY (c1)", + TableConstraint::Index { + display_as_key: true, + name: None, + index_type: None, + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX 'index' (c1, c2)", + TableConstraint::Index { + display_as_key: false, + name: Some(Ident::with_quote('\'', "index")), + index_type: None, + columns: vec![Ident::new("c1"), Ident::new("c2")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX USING BTREE (c1)", + TableConstraint::Index { + display_as_key: false, + name: None, + index_type: Some(IndexType::BTree), + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX USING HASH (c1)", + TableConstraint::Index { + display_as_key: false, + name: None, + index_type: Some(IndexType::Hash), + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX idx_name USING BTREE (c1)", + TableConstraint::Index { + display_as_key: false, + name: Some(Ident::new("idx_name")), + index_type: Some(IndexType::BTree), + columns: vec![Ident::new("c1")], + } + ); + + test_parse_table_constraint!( + dialect, + "INDEX idx_name USING HASH (c1)", + TableConstraint::Index { + display_as_key: false, + name: Some(Ident::new("idx_name")), + index_type: Some(IndexType::Hash), + columns: vec![Ident::new("c1")], + } + ); +} + +#[test] +fn test_tokenizer_error_loc() { + let sql = "foo '"; + let ast = Parser::parse_sql(&GenericDialect, sql); + assert_eq!( + ast, + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 5".to_string() + )) + ); +} + +#[test] +fn test_parser_error_loc() { + let sql = "SELECT this is a syntax error"; + let ast = Parser::parse_sql(&GenericDialect, sql); + assert_eq!( + ast, + Err(ParserError::ParserError( + "Expected: [NOT] NULL or TRUE|FALSE or [NOT] DISTINCT FROM after IS, found: a at Line: 1, Column: 16" + .to_string() + )) + ); +} + +#[test] +fn test_nested_explain_error() { + let sql = "EXPLAIN EXPLAIN SELECT 1"; + let ast = Parser::parse_sql(&GenericDialect, sql); + assert_eq!( + ast, + Err(ParserError::ParserError( + "Explain must be root of the plan".to_string() + )) + ); +} + +#[test] +fn test_parse_multipart_identifier_positive() { + let dialect = TestedDialects::new(vec![Box::new(GenericDialect {})]); + + // parse multipart with quotes + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + span: Span::empty(), + }, + Ident { + value: "F(o)o. \"bar".to_string(), + quote_style: Some('"'), + span: Span::empty(), + }, + Ident { + value: "table".to_string(), + quote_style: None, + span: Span::empty(), + }, + ]; + dialect.run_parser_method(r#"CATALOG."F(o)o. ""bar".table"#, |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); + + // allow whitespace between ident parts + let expected = vec![ + Ident { + value: "CATALOG".to_string(), + quote_style: None, + span: Span::empty(), + }, + Ident { + value: "table".to_string(), + quote_style: None, + span: Span::empty(), + }, + ]; + dialect.run_parser_method("CATALOG . table", |parser| { + let actual = parser.parse_multipart_identifier().unwrap(); + assert_eq!(expected, actual); + }); +} + +#[test] +fn test_parse_multipart_identifier_negative() { + macro_rules! test_parse_multipart_identifier_error { + ($input:expr, $expected_err:expr $(,)?) => {{ + all_dialects().run_parser_method(&*$input, |parser| { + let actual_err = parser.parse_multipart_identifier().unwrap_err(); + assert_eq!(actual_err.to_string(), $expected_err); + }); + }}; + } + + test_parse_multipart_identifier_error!( + "", + "sql parser error: Empty input when parsing identifier", + ); + + test_parse_multipart_identifier_error!( + "*schema.table", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table*", + "sql parser error: Unexpected token in identifier: *", + ); + + test_parse_multipart_identifier_error!( + "schema.table.", + "sql parser error: Trailing period in identifier", + ); + + test_parse_multipart_identifier_error!( + "schema.*", + "sql parser error: Unexpected token following period in identifier: *", + ); +} + +#[test] +fn test_mysql_partition_selection() { + let sql = "SELECT * FROM employees PARTITION (p0, p2)"; + let expected = vec!["p0", "p2"]; + + let ast: Vec = Parser::parse_sql(&MySqlDialect {}, sql).unwrap(); + assert_eq!(ast.len(), 1); + if let Statement::Query(v) = &ast[0] { + if let SetExpr::Select(select) = &*v.body { + assert_eq!(select.from.len(), 1); + let from: &TableWithJoins = &select.from[0]; + let table_factor = &from.relation; + if let TableFactor::Table { partitions, .. } = table_factor { + let actual: Vec<&str> = partitions + .iter() + .map(|ident| ident.value.as_str()) + .collect(); + assert_eq!(expected, actual); + } + } + } else { + panic!("fail to parse mysql partition selection"); + } +} + +#[test] +fn test_replace_into_placeholders() { + let sql = "REPLACE INTO t (a) VALUES (&a)"; + + assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); +} + +#[test] +fn test_replace_into_set() { + // NOTE: This is actually valid MySQL syntax, REPLACE and INSERT, + // but the parser does not yet support it. + // https://dev.mysql.com/doc/refman/8.3/en/insert.html + let sql = "REPLACE INTO t SET a='1'"; + + assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); +} + +#[test] +fn test_replace_into_set_placeholder() { + let sql = "REPLACE INTO t SET ?"; + + assert!(Parser::parse_sql(&GenericDialect {}, sql).is_err()); +} + +#[test] +fn test_replace_incomplete() { + let sql = r#"REPLACE"#; + + assert!(Parser::parse_sql(&MySqlDialect {}, sql).is_err()); +} diff --git a/src/parser/tokens.rs b/src/parser/tokens.rs new file mode 100644 index 000000000..94aeb60e0 --- /dev/null +++ b/src/parser/tokens.rs @@ -0,0 +1,359 @@ +use super::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Consume the parser and return its underlying token buffer + pub fn into_tokens(self) -> Vec { + self.tokens + } + + /// Get the precedence of the next token + pub fn get_next_precedence(&self) -> Result { + self.dialect.get_next_precedence_default(self) + } + + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) + pub fn peek_token(&self) -> TokenWithSpan { + self.peek_nth_token(0) + } + + /// Returns the `N` next non-whitespace tokens that have not yet been + /// processed. + /// + /// Example: + /// ```rust + /// # use sqlparser::dialect::GenericDialect; + /// # use sqlparser::parser::Parser; + /// # use sqlparser::keywords::Keyword; + /// # use sqlparser::tokenizer::{Token, Word}; + /// let dialect = GenericDialect {}; + /// let mut parser = Parser::new(&dialect).try_with_sql("ORDER BY foo, bar").unwrap(); + /// + /// // Note that Rust infers the number of tokens to peek based on the + /// // length of the slice pattern! + /// assert!(matches!( + /// parser.peek_tokens(), + /// [ + /// Token::Word(Word { keyword: Keyword::ORDER, .. }), + /// Token::Word(Word { keyword: Keyword::BY, .. }), + /// ] + /// )); + /// ``` + pub fn peek_tokens(&self) -> [Token; N] { + self.peek_tokens_with_location() + .map(|with_loc| with_loc.token) + } + + /// Returns the `N` next non-whitespace tokens with locations that have not + /// yet been processed. + /// + /// See [`Self::peek_token`] for an example. + pub fn peek_tokens_with_location(&self) -> [TokenWithSpan; N] { + let mut index = self.index; + core::array::from_fn(|_| loop { + let token = self.tokens.get(index); + index += 1; + if let Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) = token + { + continue; + } + break token.cloned().unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }); + }) + } + + /// Return nth non-whitespace token that has not yet been processed + pub fn peek_nth_token(&self, mut n: usize) -> TokenWithSpan { + let mut index = self.index; + loop { + index += 1; + match self.tokens.get(index - 1) { + Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) => continue, + non_whitespace => { + if n == 0 { + return non_whitespace.cloned().unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }); + } + n -= 1; + } + } + } + } + + /// Return the first token, possibly whitespace, that has not yet been processed + /// (or None if reached end-of-file). + pub fn peek_token_no_skip(&self) -> TokenWithSpan { + self.peek_nth_token_no_skip(0) + } + + /// Return nth token, possibly whitespace, that has not yet been processed. + pub fn peek_nth_token_no_skip(&self, n: usize) -> TokenWithSpan { + self.tokens + .get(self.index + n) + .cloned() + .unwrap_or(TokenWithSpan { + token: Token::EOF, + span: Span::empty(), + }) + } + + /// Look for all of the expected keywords in sequence, without consuming them + pub fn peek_keywords(&mut self, expected: &[Keyword]) -> bool { + let index = self.index; + let matched = self.parse_keywords(expected); + self.index = index; + matched + } + + /// Return the first non-whitespace token that has not yet been processed + /// (or None if reached end-of-file) and mark it as processed. OK to call + /// repeatedly after reaching EOF. + pub fn next_token(&mut self) -> TokenWithSpan { + loop { + self.index += 1; + match self.tokens.get(self.index - 1) { + Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) => continue, + token => { + return token + .cloned() + .unwrap_or_else(|| TokenWithSpan::wrap(Token::EOF)) + } + } + } + } + + /// Return the first unprocessed token, possibly whitespace. + pub fn next_token_no_skip(&mut self) -> Option<&TokenWithSpan> { + self.index += 1; + self.tokens.get(self.index - 1) + } + + /// Push back the last one non-whitespace token. Must be called after + /// `next_token()`, otherwise might panic. OK to call after + /// `next_token()` indicates an EOF. + pub fn prev_token(&mut self) { + loop { + assert!(self.index > 0); + self.index -= 1; + if let Some(TokenWithSpan { + token: Token::Whitespace(_), + span: _, + }) = self.tokens.get(self.index) + { + continue; + } + return; + } + } + + /// Report `found` was encountered instead of `expected` + pub fn expected(&self, expected: &str, found: TokenWithSpan) -> Result { + parser_err!( + format!("Expected: {expected}, found: {found}"), + found.span.start + ) + } + + /// If the current token is the `expected` keyword, consume it and returns + /// true. Otherwise, no tokens are consumed and returns false. + #[must_use] + pub fn parse_keyword(&mut self, expected: Keyword) -> bool { + self.parse_keyword_token(expected).is_some() + } + + #[must_use] + pub fn parse_keyword_token(&mut self, expected: Keyword) -> Option { + match self.peek_token().token { + Token::Word(w) if expected == w.keyword => Some(self.next_token()), + _ => None, + } + } + + #[must_use] + pub fn peek_keyword(&mut self, expected: Keyword) -> bool { + matches!(self.peek_token().token, Token::Word(w) if expected == w.keyword) + } + + /// If the current token is the `expected` keyword followed by + /// specified tokens, consume them and returns true. + /// Otherwise, no tokens are consumed and returns false. + /// + /// Note that if the length of `tokens` is too long, this function will + /// not be efficient as it does a loop on the tokens with `peek_nth_token` + /// each time. + pub fn parse_keyword_with_tokens(&mut self, expected: Keyword, tokens: &[Token]) -> bool { + match self.peek_token().token { + Token::Word(w) if expected == w.keyword => { + for (idx, token) in tokens.iter().enumerate() { + if self.peek_nth_token(idx + 1).token != *token { + return false; + } + } + // consume all tokens + for _ in 0..(tokens.len() + 1) { + self.next_token(); + } + true + } + _ => false, + } + } + + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns true. Otherwise, no tokens are + /// consumed and returns false + #[must_use] + pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { + let index = self.index; + for &keyword in keywords { + if !self.parse_keyword(keyword) { + // println!("parse_keywords aborting .. did not find {:?}", keyword); + // reset index and return immediately + self.index = index; + return false; + } + } + true + } + + /// If the current token is one of the given `keywords`, consume the token + /// and return the keyword that matches. Otherwise, no tokens are consumed + /// and returns [`None`]. + #[must_use] + pub fn parse_one_of_keywords(&mut self, keywords: &[Keyword]) -> Option { + match self.peek_token().token { + Token::Word(w) => { + keywords + .iter() + .find(|keyword| **keyword == w.keyword) + .map(|keyword| { + self.next_token(); + *keyword + }) + } + _ => None, + } + } + + /// If the current token is one of the expected keywords, consume the token + /// and return the keyword that matches. Otherwise, return an error. + pub fn expect_one_of_keywords(&mut self, keywords: &[Keyword]) -> Result { + if let Some(keyword) = self.parse_one_of_keywords(keywords) { + Ok(keyword) + } else { + let keywords: Vec = keywords.iter().map(|x| format!("{x:?}")).collect(); + self.expected( + &format!("one of {}", keywords.join(" or ")), + self.peek_token(), + ) + } + } + + /// If the current token is the `expected` keyword, consume the token. + /// Otherwise, return an error. + pub fn expect_keyword(&mut self, expected: Keyword) -> Result { + if let Some(token) = self.parse_keyword_token(expected) { + Ok(token) + } else { + self.expected(format!("{:?}", &expected).as_str(), self.peek_token()) + } + } + + /// If the current and subsequent tokens exactly match the `keywords` + /// sequence, consume them and returns Ok. Otherwise, return an Error. + pub fn expect_keywords(&mut self, expected: &[Keyword]) -> Result<(), ParserError> { + for &kw in expected { + self.expect_keyword(kw)?; + } + Ok(()) + } + + /// Consume the next token if it matches the expected token, otherwise return false + #[must_use] + pub fn consume_token(&mut self, expected: &Token) -> bool { + if self.peek_token() == *expected { + self.next_token(); + true + } else { + false + } + } + + /// If the current and subsequent tokens exactly match the `tokens` + /// sequence, consume them and returns true. Otherwise, no tokens are + /// consumed and returns false + #[must_use] + pub fn consume_tokens(&mut self, tokens: &[Token]) -> bool { + let index = self.index; + for token in tokens { + if !self.consume_token(token) { + self.index = index; + return false; + } + } + true + } + + /// Bail out if the current token is not an expected keyword, or consume it if it is + pub fn expect_token(&mut self, expected: &Token) -> Result { + if self.peek_token() == *expected { + Ok(self.next_token()) + } else { + self.expected(&expected.to_string(), self.peek_token()) + } + } + + /// Peek at the next token and determine if it is a temporal unit + /// like `second`. + pub fn next_token_is_temporal_unit(&mut self) -> bool { + if let Token::Word(word) = self.peek_token().token { + matches!( + word.keyword, + Keyword::YEAR + | Keyword::MONTH + | Keyword::WEEK + | Keyword::DAY + | Keyword::HOUR + | Keyword::MINUTE + | Keyword::SECOND + | Keyword::CENTURY + | Keyword::DECADE + | Keyword::DOW + | Keyword::DOY + | Keyword::EPOCH + | Keyword::ISODOW + | Keyword::ISOYEAR + | Keyword::JULIAN + | Keyword::MICROSECOND + | Keyword::MICROSECONDS + | Keyword::MILLENIUM + | Keyword::MILLENNIUM + | Keyword::MILLISECOND + | Keyword::MILLISECONDS + | Keyword::NANOSECOND + | Keyword::NANOSECONDS + | Keyword::QUARTER + | Keyword::TIMEZONE + | Keyword::TIMEZONE_HOUR + | Keyword::TIMEZONE_MINUTE + ) + } else { + false + } + } +} diff --git a/src/parser/truncate.rs b/src/parser/truncate.rs new file mode 100644 index 000000000..b99c2d42a --- /dev/null +++ b/src/parser/truncate.rs @@ -0,0 +1,54 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_truncate(&mut self) -> Result { + let table = self.parse_keyword(Keyword::TABLE); + let only = self.parse_keyword(Keyword::ONLY); + + let table_names = self + .parse_comma_separated(|p| p.parse_object_name(false))? + .into_iter() + .map(|n| TruncateTableTarget { name: n }) + .collect(); + + let mut partitions = None; + if self.parse_keyword(Keyword::PARTITION) { + self.expect_token(&Token::LParen)?; + partitions = Some(self.parse_comma_separated(Parser::parse_expr)?); + self.expect_token(&Token::RParen)?; + } + + let mut identity = None; + let mut cascade = None; + + if dialect_of!(self is PostgreSqlDialect | GenericDialect) { + identity = if self.parse_keywords(&[Keyword::RESTART, Keyword::IDENTITY]) { + Some(TruncateIdentityOption::Restart) + } else if self.parse_keywords(&[Keyword::CONTINUE, Keyword::IDENTITY]) { + Some(TruncateIdentityOption::Continue) + } else { + None + }; + + cascade = if self.parse_keyword(Keyword::CASCADE) { + Some(TruncateCascadeOption::Cascade) + } else if self.parse_keyword(Keyword::RESTRICT) { + Some(TruncateCascadeOption::Restrict) + } else { + None + }; + }; + + let on_cluster = self.parse_optional_on_cluster()?; + + Ok(Statement::Truncate { + table_names, + partitions, + table, + only, + identity, + cascade, + on_cluster, + }) + } +} diff --git a/src/parser/uncache.rs b/src/parser/uncache.rs new file mode 100644 index 000000000..21d7a3a63 --- /dev/null +++ b/src/parser/uncache.rs @@ -0,0 +1,14 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + /// Parse a UNCACHE TABLE statement + pub fn parse_uncache_table(&mut self) -> Result { + self.expect_keyword(Keyword::TABLE)?; + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = self.parse_object_name(false)?; + Ok(Statement::UNCache { + table_name, + if_exists, + }) + } +} diff --git a/src/parser/unlisten.rs b/src/parser/unlisten.rs new file mode 100644 index 000000000..20b9e8df1 --- /dev/null +++ b/src/parser/unlisten.rs @@ -0,0 +1,18 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_unlisten(&mut self) -> Result { + let channel = if self.consume_token(&Token::Mul) { + Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) + } else { + match self.parse_identifier(false) { + Ok(expr) => expr, + _ => { + self.prev_token(); + return self.expected("wildcard or identifier", self.peek_token()); + } + } + }; + Ok(Statement::UNLISTEN { channel }) + } +} diff --git a/src/parser/unload.rs b/src/parser/unload.rs new file mode 100644 index 000000000..8ba3e3fb2 --- /dev/null +++ b/src/parser/unload.rs @@ -0,0 +1,20 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_unload(&mut self) -> Result { + self.expect_token(&Token::LParen)?; + let query = self.parse_query()?; + self.expect_token(&Token::RParen)?; + + self.expect_keyword(Keyword::TO)?; + let to = self.parse_identifier(false)?; + + let with_options = self.parse_options(Keyword::WITH)?; + + Ok(Statement::Unload { + query, + to, + with: with_options, + }) + } +} diff --git a/src/parser/update.rs b/src/parser/update.rs new file mode 100644 index 000000000..7974f061d --- /dev/null +++ b/src/parser/update.rs @@ -0,0 +1,35 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_update(&mut self) -> Result { + let or = self.parse_conflict_clause(); + let table = self.parse_table_and_joins()?; + self.expect_keyword(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let from = if self.parse_keyword(Keyword::FROM) + && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect | SQLiteDialect ) + { + Some(self.parse_table_and_joins()?) + } else { + None + }; + let selection = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let returning = if self.parse_keyword(Keyword::RETURNING) { + Some(self.parse_comma_separated(Parser::parse_select_item)?) + } else { + None + }; + Ok(Statement::Update { + table, + assignments, + from, + selection, + returning, + or, + }) + } +} diff --git a/src/parser/use.rs b/src/parser/use.rs new file mode 100644 index 000000000..9b40499a1 --- /dev/null +++ b/src/parser/use.rs @@ -0,0 +1,77 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_use(&mut self) -> Result { + // Determine which keywords are recognized by the current dialect + let parsed_keyword = if dialect_of!(self is HiveDialect) { + // HiveDialect accepts USE DEFAULT; statement without any db specified + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(Statement::Use(Use::Default)); + } + None // HiveDialect doesn't expect any other specific keyword after `USE` + } else if dialect_of!(self is DatabricksDialect) { + self.parse_one_of_keywords(&[Keyword::CATALOG, Keyword::DATABASE, Keyword::SCHEMA]) + } else if dialect_of!(self is SnowflakeDialect) { + self.parse_one_of_keywords(&[ + Keyword::DATABASE, + Keyword::SCHEMA, + Keyword::WAREHOUSE, + Keyword::ROLE, + Keyword::SECONDARY, + ]) + } else { + None // No specific keywords for other dialects, including GenericDialect + }; + + let result = if matches!(parsed_keyword, Some(Keyword::SECONDARY)) { + self.parse_secondary_roles()? + } else { + let obj_name = self.parse_object_name(false)?; + match parsed_keyword { + Some(Keyword::CATALOG) => Use::Catalog(obj_name), + Some(Keyword::DATABASE) => Use::Database(obj_name), + Some(Keyword::SCHEMA) => Use::Schema(obj_name), + Some(Keyword::WAREHOUSE) => Use::Warehouse(obj_name), + Some(Keyword::ROLE) => Use::Role(obj_name), + _ => Use::Object(obj_name), + } + }; + + Ok(Statement::Use(result)) + } + + fn parse_secondary_roles(&mut self) -> Result { + self.expect_keyword(Keyword::ROLES)?; + if self.parse_keyword(Keyword::NONE) { + Ok(Use::SecondaryRoles(SecondaryRoles::None)) + } else if self.parse_keyword(Keyword::ALL) { + Ok(Use::SecondaryRoles(SecondaryRoles::All)) + } else { + let roles = self.parse_comma_separated(|parser| parser.parse_identifier(false))?; + Ok(Use::SecondaryRoles(SecondaryRoles::List(roles))) + } + } + + /// Parse a `SET ROLE` statement. Expects SET to be consumed already. + pub(crate) fn parse_set_role( + &mut self, + modifier: Option, + ) -> Result { + self.expect_keyword(Keyword::ROLE)?; + let context_modifier = match modifier { + Some(Keyword::LOCAL) => ContextModifier::Local, + Some(Keyword::SESSION) => ContextModifier::Session, + _ => ContextModifier::None, + }; + + let role_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_identifier(false)?) + }; + Ok(Statement::SetRole { + context_modifier, + role_name, + }) + } +} diff --git a/src/parser/value.rs b/src/parser/value.rs new file mode 100644 index 000000000..d08a06bad --- /dev/null +++ b/src/parser/value.rs @@ -0,0 +1,756 @@ +use super::*; + +use crate::parser_err; + +impl<'a> Parser<'a> { + /// Parse a literal value (numbers, strings, date/time, booleans) + pub fn parse_value(&mut self) -> Result { + let next_token = self.next_token(); + let span = next_token.span; + match next_token.token { + Token::Word(w) => match w.keyword { + Keyword::TRUE if self.dialect.supports_boolean_literals() => { + Ok(Value::Boolean(true)) + } + Keyword::FALSE if self.dialect.supports_boolean_literals() => { + Ok(Value::Boolean(false)) + } + Keyword::NULL => Ok(Value::Null), + Keyword::NoKeyword if w.quote_style.is_some() => match w.quote_style { + Some('"') => Ok(Value::DoubleQuotedString(w.value)), + Some('\'') => Ok(Value::SingleQuotedString(w.value)), + _ => self.expected( + "A value?", + TokenWithSpan { + token: Token::Word(w), + span, + }, + )?, + }, + _ => self.expected( + "a concrete value", + TokenWithSpan { + token: Token::Word(w), + span, + }, + ), + }, + // The call to n.parse() returns a bigdecimal when the + // bigdecimal feature is enabled, and is otherwise a no-op + // (i.e., it returns the input string). + Token::Number(n, l) => Ok(Value::Number(Self::parse(n, span.start)?, l)), + Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), + Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::TripleSingleQuotedString(ref s) => { + Ok(Value::TripleSingleQuotedString(s.to_string())) + } + Token::TripleDoubleQuotedString(ref s) => { + Ok(Value::TripleDoubleQuotedString(s.to_string())) + } + Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), + Token::SingleQuotedByteStringLiteral(ref s) => { + Ok(Value::SingleQuotedByteStringLiteral(s.clone())) + } + Token::DoubleQuotedByteStringLiteral(ref s) => { + Ok(Value::DoubleQuotedByteStringLiteral(s.clone())) + } + Token::TripleSingleQuotedByteStringLiteral(ref s) => { + Ok(Value::TripleSingleQuotedByteStringLiteral(s.clone())) + } + Token::TripleDoubleQuotedByteStringLiteral(ref s) => { + Ok(Value::TripleDoubleQuotedByteStringLiteral(s.clone())) + } + Token::SingleQuotedRawStringLiteral(ref s) => { + Ok(Value::SingleQuotedRawStringLiteral(s.clone())) + } + Token::DoubleQuotedRawStringLiteral(ref s) => { + Ok(Value::DoubleQuotedRawStringLiteral(s.clone())) + } + Token::TripleSingleQuotedRawStringLiteral(ref s) => { + Ok(Value::TripleSingleQuotedRawStringLiteral(s.clone())) + } + Token::TripleDoubleQuotedRawStringLiteral(ref s) => { + Ok(Value::TripleDoubleQuotedRawStringLiteral(s.clone())) + } + Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), + Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), + Token::UnicodeStringLiteral(ref s) => Ok(Value::UnicodeStringLiteral(s.to_string())), + Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), + tok @ Token::Colon | tok @ Token::AtSign => { + // Not calling self.parse_identifier(false)? because only in placeholder we want to check numbers as idfentifies + // This because snowflake allows numbers as placeholders + let next_token = self.next_token(); + let ident = match next_token.token { + Token::Word(w) => Ok(w.to_ident(next_token.span)), + Token::Number(w, false) => Ok(Ident::new(w)), + _ => self.expected("placeholder", next_token), + }?; + let placeholder = tok.to_string() + &ident.value; + Ok(Value::Placeholder(placeholder)) + } + unexpected => self.expected( + "a value", + TokenWithSpan { + token: unexpected, + span, + }, + ), + } + } + + pub fn parse_values(&mut self, allow_empty: bool) -> Result { + let mut explicit_row = false; + + let rows = self.parse_comma_separated(|parser| { + if parser.parse_keyword(Keyword::ROW) { + explicit_row = true; + } + + parser.expect_token(&Token::LParen)?; + if allow_empty && parser.peek_token().token == Token::RParen { + parser.next_token(); + Ok(vec![]) + } else { + let exprs = parser.parse_comma_separated(Parser::parse_expr)?; + parser.expect_token(&Token::RParen)?; + Ok(exprs) + } + })?; + Ok(Values { explicit_row, rows }) + } + + /// Parse an unsigned numeric literal + pub fn parse_number_value(&mut self) -> Result { + match self.parse_value()? { + v @ Value::Number(_, _) => Ok(v), + v @ Value::Placeholder(_) => Ok(v), + _ => { + self.prev_token(); + self.expected("literal number", self.peek_token()) + } + } + } + + /// Parse a numeric literal as an expression. Returns a [`Expr::UnaryOp`] if the number is signed, + /// otherwise returns a [`Expr::Value`] + pub fn parse_number(&mut self) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Plus => Ok(Expr::UnaryOp { + op: UnaryOperator::Plus, + expr: Box::new(Expr::Value(self.parse_number_value()?)), + }), + Token::Minus => Ok(Expr::UnaryOp { + op: UnaryOperator::Minus, + expr: Box::new(Expr::Value(self.parse_number_value()?)), + }), + _ => { + self.prev_token(); + Ok(Expr::Value(self.parse_number_value()?)) + } + } + } + + pub(crate) fn parse_introduced_string_value(&mut self) -> Result { + let next_token = self.next_token(); + let span = next_token.span; + match next_token.token { + Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), + Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), + unexpected => self.expected( + "a string value", + TokenWithSpan { + token: unexpected, + span, + }, + ), + } + } + + /// Parse an unsigned literal integer/long + pub fn parse_literal_uint(&mut self) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Number(s, _) => Self::parse::(s, next_token.span.start), + _ => self.expected("literal int", next_token), + } + } + + /// Parse a literal string + pub fn parse_literal_string(&mut self) -> Result { + let next_token = self.next_token(); + match next_token.token { + Token::Word(Word { + value, + keyword: Keyword::NoKeyword, + .. + }) => Ok(value), + Token::SingleQuotedString(s) => Ok(s), + Token::DoubleQuotedString(s) => Ok(s), + Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Ok(s) + } + Token::UnicodeStringLiteral(s) => Ok(s), + _ => self.expected("literal string", next_token), + } + } + + pub fn parse_enum_values(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let values = self.parse_comma_separated(|parser| { + let name = parser.parse_literal_string()?; + let e = if parser.consume_token(&Token::Eq) { + let value = parser.parse_number()?; + EnumMember::NamedValue(name, value) + } else { + EnumMember::Name(name) + }; + Ok(e) + })?; + self.expect_token(&Token::RParen)?; + + Ok(values) + } + + /// Parse datetime64 [1] + /// Syntax + /// ```sql + /// DateTime64(precision[, timezone]) + /// ``` + /// + /// [1]: https://clickhouse.com/docs/en/sql-reference/data-types/datetime64 + pub fn parse_datetime_64(&mut self) -> Result<(u64, Option), ParserError> { + self.expect_keyword(Keyword::DATETIME64)?; + self.expect_token(&Token::LParen)?; + let precision = self.parse_literal_uint()?; + let time_zone = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_string()?) + } else { + None + }; + self.expect_token(&Token::RParen)?; + Ok((precision, time_zone)) + } + + /// Parse a SQL datatype (in the context of a CREATE TABLE statement for example) + pub fn parse_data_type(&mut self) -> Result { + let (ty, trailing_bracket) = self.parse_data_type_helper()?; + if trailing_bracket.0 { + return parser_err!( + format!("unmatched > after parsing data type {ty}"), + self.peek_token() + ); + } + + Ok(ty) + } + + pub(crate) fn parse_data_type_helper( + &mut self, + ) -> Result<(DataType, MatchedTrailingBracket), ParserError> { + let next_token = self.next_token(); + let mut trailing_bracket: MatchedTrailingBracket = false.into(); + let mut data = match next_token.token { + Token::Word(w) => match w.keyword { + Keyword::BOOLEAN => Ok(DataType::Boolean), + Keyword::BOOL => Ok(DataType::Bool), + Keyword::FLOAT => Ok(DataType::Float(self.parse_optional_precision()?)), + Keyword::REAL => Ok(DataType::Real), + Keyword::FLOAT4 => Ok(DataType::Float4), + Keyword::FLOAT32 => Ok(DataType::Float32), + Keyword::FLOAT64 => Ok(DataType::Float64), + Keyword::FLOAT8 => Ok(DataType::Float8), + Keyword::DOUBLE => { + if self.parse_keyword(Keyword::PRECISION) { + Ok(DataType::DoublePrecision) + } else { + Ok(DataType::Double) + } + } + Keyword::TINYINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedTinyInt(optional_precision?)) + } else { + Ok(DataType::TinyInt(optional_precision?)) + } + } + Keyword::INT2 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt2(optional_precision?)) + } else { + Ok(DataType::Int2(optional_precision?)) + } + } + Keyword::SMALLINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedSmallInt(optional_precision?)) + } else { + Ok(DataType::SmallInt(optional_precision?)) + } + } + Keyword::MEDIUMINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedMediumInt(optional_precision?)) + } else { + Ok(DataType::MediumInt(optional_precision?)) + } + } + Keyword::INT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt(optional_precision?)) + } else { + Ok(DataType::Int(optional_precision?)) + } + } + Keyword::INT4 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt4(optional_precision?)) + } else { + Ok(DataType::Int4(optional_precision?)) + } + } + Keyword::INT8 => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInt8(optional_precision?)) + } else { + Ok(DataType::Int8(optional_precision?)) + } + } + Keyword::INT16 => Ok(DataType::Int16), + Keyword::INT32 => Ok(DataType::Int32), + Keyword::INT64 => Ok(DataType::Int64), + Keyword::INT128 => Ok(DataType::Int128), + Keyword::INT256 => Ok(DataType::Int256), + Keyword::INTEGER => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedInteger(optional_precision?)) + } else { + Ok(DataType::Integer(optional_precision?)) + } + } + Keyword::BIGINT => { + let optional_precision = self.parse_optional_precision(); + if self.parse_keyword(Keyword::UNSIGNED) { + Ok(DataType::UnsignedBigInt(optional_precision?)) + } else { + Ok(DataType::BigInt(optional_precision?)) + } + } + Keyword::UINT8 => Ok(DataType::UInt8), + Keyword::UINT16 => Ok(DataType::UInt16), + Keyword::UINT32 => Ok(DataType::UInt32), + Keyword::UINT64 => Ok(DataType::UInt64), + Keyword::UINT128 => Ok(DataType::UInt128), + Keyword::UINT256 => Ok(DataType::UInt256), + Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), + Keyword::NVARCHAR => { + Ok(DataType::Nvarchar(self.parse_optional_character_length()?)) + } + Keyword::CHARACTER => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::CharacterVarying( + self.parse_optional_character_length()?, + )) + } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { + Ok(DataType::CharacterLargeObject( + self.parse_optional_precision()?, + )) + } else { + Ok(DataType::Character(self.parse_optional_character_length()?)) + } + } + Keyword::CHAR => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::CharVarying( + self.parse_optional_character_length()?, + )) + } else if self.parse_keywords(&[Keyword::LARGE, Keyword::OBJECT]) { + Ok(DataType::CharLargeObject(self.parse_optional_precision()?)) + } else { + Ok(DataType::Char(self.parse_optional_character_length()?)) + } + } + Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)), + Keyword::BINARY => Ok(DataType::Binary(self.parse_optional_precision()?)), + Keyword::VARBINARY => Ok(DataType::Varbinary(self.parse_optional_precision()?)), + Keyword::BLOB => Ok(DataType::Blob(self.parse_optional_precision()?)), + Keyword::TINYBLOB => Ok(DataType::TinyBlob), + Keyword::MEDIUMBLOB => Ok(DataType::MediumBlob), + Keyword::LONGBLOB => Ok(DataType::LongBlob), + Keyword::BYTES => Ok(DataType::Bytes(self.parse_optional_precision()?)), + Keyword::BIT => { + if self.parse_keyword(Keyword::VARYING) { + Ok(DataType::BitVarying(self.parse_optional_precision()?)) + } else { + Ok(DataType::Bit(self.parse_optional_precision()?)) + } + } + Keyword::UUID => Ok(DataType::Uuid), + Keyword::DATE => Ok(DataType::Date), + Keyword::DATE32 => Ok(DataType::Date32), + Keyword::DATETIME => Ok(DataType::Datetime(self.parse_optional_precision()?)), + Keyword::DATETIME64 => { + self.prev_token(); + let (precision, time_zone) = self.parse_datetime_64()?; + Ok(DataType::Datetime64(precision, time_zone)) + } + Keyword::TIMESTAMP => { + let precision = self.parse_optional_precision()?; + let tz = if self.parse_keyword(Keyword::WITH) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithTimeZone + } else if self.parse_keyword(Keyword::WITHOUT) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithoutTimeZone + } else { + TimezoneInfo::None + }; + Ok(DataType::Timestamp(precision, tz)) + } + Keyword::TIMESTAMPTZ => Ok(DataType::Timestamp( + self.parse_optional_precision()?, + TimezoneInfo::Tz, + )), + Keyword::TIME => { + let precision = self.parse_optional_precision()?; + let tz = if self.parse_keyword(Keyword::WITH) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithTimeZone + } else if self.parse_keyword(Keyword::WITHOUT) { + self.expect_keywords(&[Keyword::TIME, Keyword::ZONE])?; + TimezoneInfo::WithoutTimeZone + } else { + TimezoneInfo::None + }; + Ok(DataType::Time(precision, tz)) + } + Keyword::TIMETZ => Ok(DataType::Time( + self.parse_optional_precision()?, + TimezoneInfo::Tz, + )), + // Interval types can be followed by a complicated interval + // qualifier that we don't currently support. See + // parse_interval for a taste. + Keyword::INTERVAL => Ok(DataType::Interval), + Keyword::JSON => Ok(DataType::JSON), + Keyword::JSONB => Ok(DataType::JSONB), + Keyword::REGCLASS => Ok(DataType::Regclass), + Keyword::STRING => Ok(DataType::String(self.parse_optional_precision()?)), + Keyword::FIXEDSTRING => { + self.expect_token(&Token::LParen)?; + let character_length = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(DataType::FixedString(character_length)) + } + Keyword::TEXT => Ok(DataType::Text), + Keyword::TINYTEXT => Ok(DataType::TinyText), + Keyword::MEDIUMTEXT => Ok(DataType::MediumText), + Keyword::LONGTEXT => Ok(DataType::LongText), + Keyword::BYTEA => Ok(DataType::Bytea), + Keyword::NUMERIC => Ok(DataType::Numeric( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::DECIMAL => Ok(DataType::Decimal( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::DEC => Ok(DataType::Dec( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::BIGNUMERIC => Ok(DataType::BigNumeric( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::BIGDECIMAL => Ok(DataType::BigDecimal( + self.parse_exact_number_optional_precision_scale()?, + )), + Keyword::ENUM => Ok(DataType::Enum(self.parse_enum_values()?, None)), + Keyword::ENUM8 => Ok(DataType::Enum(self.parse_enum_values()?, Some(8))), + Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), + Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), + Keyword::ARRAY => { + if dialect_of!(self is SnowflakeDialect) { + Ok(DataType::Array(ArrayElemTypeDef::None)) + } else if dialect_of!(self is ClickHouseDialect) { + Ok(self.parse_sub_type(|internal_type| { + DataType::Array(ArrayElemTypeDef::Parenthesis(internal_type)) + })?) + } else { + self.expect_token(&Token::Lt)?; + let (inside_type, _trailing_bracket) = self.parse_data_type_helper()?; + trailing_bracket = self.expect_closing_angle_bracket(_trailing_bracket)?; + Ok(DataType::Array(ArrayElemTypeDef::AngleBracket(Box::new( + inside_type, + )))) + } + } + Keyword::STRUCT if dialect_of!(self is DuckDbDialect) => { + self.prev_token(); + let field_defs = self.parse_duckdb_struct_type_def()?; + Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) + } + Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => { + self.prev_token(); + let (field_defs, _trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def)?; + trailing_bracket = _trailing_bracket; + Ok(DataType::Struct( + field_defs, + StructBracketKind::AngleBrackets, + )) + } + Keyword::UNION if dialect_of!(self is DuckDbDialect | GenericDialect) => { + self.prev_token(); + let fields = self.parse_union_type_def()?; + Ok(DataType::Union(fields)) + } + Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::Nullable)?) + } + Keyword::LOWCARDINALITY if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Ok(self.parse_sub_type(DataType::LowCardinality)?) + } + Keyword::MAP if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let (key_data_type, value_data_type) = self.parse_click_house_map_def()?; + Ok(DataType::Map( + Box::new(key_data_type), + Box::new(value_data_type), + )) + } + Keyword::NESTED if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.expect_token(&Token::LParen)?; + let field_defs = self.parse_comma_separated(Parser::parse_column_def)?; + self.expect_token(&Token::RParen)?; + Ok(DataType::Nested(field_defs)) + } + Keyword::TUPLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + self.prev_token(); + let field_defs = self.parse_click_house_tuple_def()?; + Ok(DataType::Tuple(field_defs)) + } + Keyword::TRIGGER => Ok(DataType::Trigger), + _ => { + self.prev_token(); + let type_name = self.parse_object_name(false)?; + if let Some(modifiers) = self.parse_optional_type_modifiers()? { + Ok(DataType::Custom(type_name, modifiers)) + } else { + Ok(DataType::Custom(type_name, vec![])) + } + } + }, + _ => self.expected("a data type name", next_token), + }?; + + // Parse array data types. Note: this is postgresql-specific and different from + // Keyword::ARRAY syntax from above + while self.consume_token(&Token::LBracket) { + let size = if dialect_of!(self is GenericDialect | DuckDbDialect | PostgreSqlDialect) { + self.maybe_parse(|p| p.parse_literal_uint())? + } else { + None + }; + self.expect_token(&Token::RBracket)?; + data = DataType::Array(ArrayElemTypeDef::SquareBracket(Box::new(data), size)) + } + Ok((data, trailing_bracket)) + } + + pub fn parse_exact_number_optional_precision_scale( + &mut self, + ) -> Result { + if self.consume_token(&Token::LParen) { + let precision = self.parse_literal_uint()?; + let scale = if self.consume_token(&Token::Comma) { + Some(self.parse_literal_uint()?) + } else { + None + }; + + self.expect_token(&Token::RParen)?; + + match scale { + None => Ok(ExactNumberInfo::Precision(precision)), + Some(scale) => Ok(ExactNumberInfo::PrecisionAndScale(precision, scale)), + } + } else { + Ok(ExactNumberInfo::None) + } + } + + pub fn parse_optional_character_length( + &mut self, + ) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let character_length = self.parse_character_length()?; + self.expect_token(&Token::RParen)?; + Ok(Some(character_length)) + } else { + Ok(None) + } + } + + pub fn parse_optional_precision(&mut self) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let n = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(Some(n)) + } else { + Ok(None) + } + } + + pub fn parse_optional_type_modifiers(&mut self) -> Result>, ParserError> { + if self.consume_token(&Token::LParen) { + let mut modifiers = Vec::new(); + loop { + let next_token = self.next_token(); + match next_token.token { + Token::Word(w) => modifiers.push(w.to_string()), + Token::Number(n, _) => modifiers.push(n), + Token::SingleQuotedString(s) => modifiers.push(s), + + Token::Comma => { + continue; + } + Token::RParen => { + break; + } + _ => self.expected("type modifiers", next_token)?, + } + } + + Ok(Some(modifiers)) + } else { + Ok(None) + } + } + + pub fn parse_string_values(&mut self) -> Result, ParserError> { + self.expect_token(&Token::LParen)?; + let mut values = Vec::new(); + loop { + let next_token = self.next_token(); + match next_token.token { + Token::SingleQuotedString(value) => values.push(value), + _ => self.expected("a string", next_token)?, + } + let next_token = self.next_token(); + match next_token.token { + Token::Comma => (), + Token::RParen => break, + _ => self.expected(", or }", next_token)?, + } + } + Ok(values) + } + + /// Parse a field definition in a [struct] or [tuple]. + /// Syntax: + /// + /// ```sql + /// [field_name] field_type + /// ``` + /// + /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type + /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + pub(crate) fn parse_struct_field_def( + &mut self, + ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { + // Look beyond the next item to infer whether both field name + // and type are specified. + let is_anonymous_field = !matches!( + (self.peek_nth_token(0).token, self.peek_nth_token(1).token), + (Token::Word(_), Token::Word(_)) + ); + + let field_name = if is_anonymous_field { + None + } else { + Some(self.parse_identifier(false)?) + }; + + let (field_type, trailing_bracket) = self.parse_data_type_helper()?; + + Ok(( + StructField { + field_name, + field_type, + }, + trailing_bracket, + )) + } + + /// Parse a parenthesized sub data type + fn parse_sub_type(&mut self, parent_type: F) -> Result + where + F: FnOnce(Box) -> DataType, + { + self.expect_token(&Token::LParen)?; + let inside_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + Ok(parent_type(inside_type.into())) + } + + /// For nested types that use the angle bracket syntax, this matches either + /// `>`, `>>` or nothing depending on which variant is expected (specified by the previously + /// matched `trailing_bracket` argument). It returns whether there is a trailing + /// left to be matched - (i.e. if '>>' was matched). + pub(crate) fn expect_closing_angle_bracket( + &mut self, + trailing_bracket: MatchedTrailingBracket, + ) -> Result { + let trailing_bracket = if !trailing_bracket.0 { + match self.peek_token().token { + Token::Gt => { + self.next_token(); + false.into() + } + Token::ShiftRight => { + self.next_token(); + true.into() + } + _ => return self.expected(">", self.peek_token()), + } + } else { + false.into() + }; + + Ok(trailing_bracket) + } + + /// DuckDB specific: Parse a Union type definition as a sequence of field-value pairs. + /// + /// Syntax: + /// + /// ```sql + /// UNION(field_name field_type[,...]) + /// ``` + /// + /// [1]: https://duckdb.org/docs/sql/data_types/union.html + fn parse_union_type_def(&mut self) -> Result, ParserError> { + self.expect_keyword(Keyword::UNION)?; + + self.expect_token(&Token::LParen)?; + + let fields = self.parse_comma_separated(|p| { + Ok(UnionField { + field_name: p.parse_identifier(false)?, + field_type: p.parse_data_type()?, + }) + })?; + + self.expect_token(&Token::RParen)?; + + Ok(fields) + } +} diff --git a/src/parser/window.rs b/src/parser/window.rs new file mode 100644 index 000000000..59b56526e --- /dev/null +++ b/src/parser/window.rs @@ -0,0 +1,105 @@ +use crate::parser::*; + +impl<'a> Parser<'a> { + pub fn parse_named_window(&mut self) -> Result { + let ident = self.parse_identifier(false)?; + self.expect_keyword(Keyword::AS)?; + + let window_expr = if self.consume_token(&Token::LParen) { + NamedWindowExpr::WindowSpec(self.parse_window_spec()?) + } else if self.dialect.supports_window_clause_named_window_reference() { + NamedWindowExpr::NamedWindow(self.parse_identifier(false)?) + } else { + return self.expected("(", self.peek_token()); + }; + + Ok(NamedWindowDefinition(ident, window_expr)) + } + + pub fn parse_window_spec(&mut self) -> Result { + let window_name = match self.peek_token().token { + Token::Word(word) if word.keyword == Keyword::NoKeyword => { + self.parse_optional_indent()? + } + _ => None, + }; + + let partition_by = if self.parse_keywords(&[Keyword::PARTITION, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_expr)? + } else { + vec![] + }; + let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + self.parse_comma_separated(Parser::parse_order_by_expr)? + } else { + vec![] + }; + + let window_frame = if !self.consume_token(&Token::RParen) { + let window_frame = self.parse_window_frame()?; + self.expect_token(&Token::RParen)?; + Some(window_frame) + } else { + None + }; + Ok(WindowSpec { + window_name, + partition_by, + order_by, + window_frame, + }) + } + + pub fn parse_window_frame(&mut self) -> Result { + let units = self.parse_window_frame_units()?; + let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { + let start_bound = self.parse_window_frame_bound()?; + self.expect_keyword(Keyword::AND)?; + let end_bound = Some(self.parse_window_frame_bound()?); + (start_bound, end_bound) + } else { + (self.parse_window_frame_bound()?, None) + }; + Ok(WindowFrame { + units, + start_bound, + end_bound, + }) + } + + pub fn parse_window_frame_units(&mut self) -> Result { + let next_token = self.next_token(); + match &next_token.token { + Token::Word(w) => match w.keyword { + Keyword::ROWS => Ok(WindowFrameUnits::Rows), + Keyword::RANGE => Ok(WindowFrameUnits::Range), + Keyword::GROUPS => Ok(WindowFrameUnits::Groups), + _ => self.expected("ROWS, RANGE, GROUPS", next_token)?, + }, + _ => self.expected("ROWS, RANGE, GROUPS", next_token), + } + } + + /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` + pub fn parse_window_frame_bound(&mut self) -> Result { + if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { + Ok(WindowFrameBound::CurrentRow) + } else { + let rows = if self.parse_keyword(Keyword::UNBOUNDED) { + None + } else { + Some(Box::new(match self.peek_token().token { + Token::SingleQuotedString(_) => self.parse_interval()?, + _ => self.parse_expr()?, + })) + }; + if self.parse_keyword(Keyword::PRECEDING) { + Ok(WindowFrameBound::Preceding(rows)) + } else if self.parse_keyword(Keyword::FOLLOWING) { + Ok(WindowFrameBound::Following(rows)) + } else { + self.expected("PRECEDING or FOLLOWING", self.peek_token()) + } + } + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index aacfc16fa..cf4bb819f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -40,7 +40,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::DollarQuotedString; +use crate::ast::{DollarQuotedString, Ident}; use crate::dialect::Dialect; use crate::dialect::{ BigQueryDialect, DuckDbDialect, GenericDialect, MySqlDialect, PostgreSqlDialect, @@ -388,6 +388,14 @@ impl fmt::Display for Word { } impl Word { + pub fn to_ident(&self, span: Span) -> Ident { + Ident { + value: self.value.clone(), + quote_style: self.quote_style, + span, + } + } + fn matching_end_quote(ch: char) -> char { match ch { '"' => '"', // ANSI and most dialects From d1c90b23a0950a22c63b0f3d296dce09863e9c13 Mon Sep 17 00:00:00 2001 From: "Paul J. Davis" Date: Sat, 7 Dec 2024 18:32:30 -0600 Subject: [PATCH 2/2] Fix up lifetimes that can be elided You can't propose a 26k line diff when CI fails. --- src/parser/alter.rs | 2 +- src/parser/analyze.rs | 2 +- src/parser/assert.rs | 2 +- src/parser/assignment.rs | 2 +- src/parser/attach.rs | 2 +- src/parser/cache.rs | 2 +- src/parser/call.rs | 2 +- src/parser/close.rs | 2 +- src/parser/columns.rs | 2 +- src/parser/comment.rs | 2 +- src/parser/commit.rs | 2 +- src/parser/copy.rs | 2 +- src/parser/create.rs | 2 +- src/parser/deallocate.rs | 2 +- src/parser/declare.rs | 2 +- src/parser/delete.rs | 2 +- src/parser/dialects/bigquery.rs | 2 +- src/parser/dialects/clickhouse.rs | 2 +- src/parser/dialects/duckdb.rs | 2 +- src/parser/dialects/hive.rs | 2 +- src/parser/dialects/mssql.rs | 2 +- src/parser/dialects/postgresql.rs | 2 +- src/parser/dialects/snowflake.rs | 2 +- src/parser/dialects/sqlite.rs | 2 +- src/parser/dialects/utils.rs | 2 +- src/parser/discard.rs | 2 +- src/parser/drop.rs | 2 +- src/parser/end.rs | 2 +- src/parser/execute.rs | 2 +- src/parser/explain.rs | 2 +- src/parser/fetch.rs | 2 +- src/parser/flush.rs | 2 +- src/parser/grant.rs | 2 +- src/parser/identifier.rs | 2 +- src/parser/insert.rs | 2 +- src/parser/install.rs | 2 +- src/parser/kill.rs | 2 +- src/parser/listen.rs | 2 +- src/parser/load.rs | 2 +- src/parser/merge.rs | 2 +- src/parser/msck.rs | 2 +- src/parser/notify.rs | 2 +- src/parser/optimize.rs | 2 +- src/parser/options.rs | 2 +- src/parser/pragma.rs | 2 +- src/parser/prepare.rs | 2 +- src/parser/release.rs | 2 +- src/parser/replace.rs | 2 +- src/parser/revoke.rs | 2 +- src/parser/rollback.rs | 2 +- src/parser/savepoint.rs | 2 +- src/parser/select.rs | 2 +- src/parser/set.rs | 4 ++-- src/parser/show.rs | 2 +- src/parser/start.rs | 2 +- src/parser/tokens.rs | 2 +- src/parser/truncate.rs | 2 +- src/parser/uncache.rs | 2 +- src/parser/unlisten.rs | 2 +- src/parser/unload.rs | 2 +- src/parser/update.rs | 2 +- src/parser/use.rs | 2 +- src/parser/value.rs | 2 +- src/parser/window.rs | 2 +- 64 files changed, 65 insertions(+), 65 deletions(-) diff --git a/src/parser/alter.rs b/src/parser/alter.rs index cf6aaae9c..1c404d2c8 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -17,7 +17,7 @@ use alloc::vec; use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, diff --git a/src/parser/analyze.rs b/src/parser/analyze.rs index 239a0d43d..2bc4a5d17 100644 --- a/src/parser/analyze.rs +++ b/src/parser/analyze.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_analyze(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; let table_name = self.parse_object_name(false)?; diff --git a/src/parser/assert.rs b/src/parser/assert.rs index 6dc4aaee6..735352ab0 100644 --- a/src/parser/assert.rs +++ b/src/parser/assert.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_assert(&mut self) -> Result { let condition = self.parse_expr()?; let message = if self.parse_keyword(Keyword::AS) { diff --git a/src/parser/assignment.rs b/src/parser/assignment.rs index 3467951e2..edfce42e2 100644 --- a/src/parser/assignment.rs +++ b/src/parser/assignment.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a `var = expr` assignment, used in an UPDATE statement pub fn parse_assignment(&mut self) -> Result { let target = self.parse_assignment_target()?; diff --git a/src/parser/attach.rs b/src/parser/attach.rs index b2f437833..8b1cb9672 100644 --- a/src/parser/attach.rs +++ b/src/parser/attach.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_attach_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let database_file_name = self.parse_expr()?; diff --git a/src/parser/cache.rs b/src/parser/cache.rs index 3d5c388d5..140ea0e5c 100644 --- a/src/parser/cache.rs +++ b/src/parser/cache.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a CACHE TABLE statement pub fn parse_cache_table(&mut self) -> Result { let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); diff --git a/src/parser/call.rs b/src/parser/call.rs index 3e0ae7a4e..9d101513d 100644 --- a/src/parser/call.rs +++ b/src/parser/call.rs @@ -2,7 +2,7 @@ use crate::parser::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a `CALL procedure_name(arg1, arg2, ...)` /// or `CALL procedure_name` statement pub fn parse_call(&mut self) -> Result { diff --git a/src/parser/close.rs b/src/parser/close.rs index dd6eef6ee..630c6fa6a 100644 --- a/src/parser/close.rs +++ b/src/parser/close.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_close(&mut self) -> Result { let cursor = if self.parse_keyword(Keyword::ALL) { CloseCursor::All diff --git a/src/parser/columns.rs b/src/parser/columns.rs index 012c84fd1..ad3c0caca 100644 --- a/src/parser/columns.rs +++ b/src/parser/columns.rs @@ -1,6 +1,6 @@ use super::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { let mut columns = vec![]; let mut constraints = vec![]; diff --git a/src/parser/comment.rs b/src/parser/comment.rs index 8c5bca13e..c55f16c99 100644 --- a/src/parser/comment.rs +++ b/src/parser/comment.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_comment(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); diff --git a/src/parser/commit.rs b/src/parser/commit.rs index 900649174..0b208732a 100644 --- a/src/parser/commit.rs +++ b/src/parser/commit.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_commit(&mut self) -> Result { Ok(Statement::Commit { chain: self.parse_commit_rollback_chain()?, diff --git a/src/parser/copy.rs b/src/parser/copy.rs index 3310bd510..1bc318a7b 100644 --- a/src/parser/copy.rs +++ b/src/parser/copy.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a copy statement pub fn parse_copy(&mut self) -> Result { let source; diff --git a/src/parser/create.rs b/src/parser/create.rs index 91f618dbc..c6deab598 100644 --- a/src/parser/create.rs +++ b/src/parser/create.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a SQL CREATE statement pub fn parse_create(&mut self) -> Result { let or_replace = self.parse_keywords(&[Keyword::OR, Keyword::REPLACE]); diff --git a/src/parser/deallocate.rs b/src/parser/deallocate.rs index afdb297b9..e93e6fcb4 100644 --- a/src/parser/deallocate.rs +++ b/src/parser/deallocate.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); let name = self.parse_identifier(false)?; diff --git a/src/parser/declare.rs b/src/parser/declare.rs index 2c2980457..c480510e4 100644 --- a/src/parser/declare.rs +++ b/src/parser/declare.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a `DECLARE` statement. /// /// ```sql diff --git a/src/parser/delete.rs b/src/parser/delete.rs index 5d2baf499..769500dd8 100644 --- a/src/parser/delete.rs +++ b/src/parser/delete.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_delete(&mut self) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. diff --git a/src/parser/dialects/bigquery.rs b/src/parser/dialects/bigquery.rs index 37f4c452c..399e6d81a 100644 --- a/src/parser/dialects/bigquery.rs +++ b/src/parser/dialects/bigquery.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse `CREATE FUNCTION` for [BigQuery] /// /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement diff --git a/src/parser/dialects/clickhouse.rs b/src/parser/dialects/clickhouse.rs index 5fb4a6a32..61478d698 100644 --- a/src/parser/dialects/clickhouse.rs +++ b/src/parser/dialects/clickhouse.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse clickhouse [map] /// /// Syntax diff --git a/src/parser/dialects/duckdb.rs b/src/parser/dialects/duckdb.rs index c6a642ee8..2e8cc5a32 100644 --- a/src/parser/dialects/duckdb.rs +++ b/src/parser/dialects/duckdb.rs @@ -2,7 +2,7 @@ use crate::parser::*; // DuckDB related parsing -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_attach_duckdb_database_options( &mut self, ) -> Result, ParserError> { diff --git a/src/parser/dialects/hive.rs b/src/parser/dialects/hive.rs index f8e8bc651..7c453518d 100644 --- a/src/parser/dialects/hive.rs +++ b/src/parser/dialects/hive.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse `CREATE FUNCTION` for [Hive] /// /// [Hive]: https://cwiki.apache.org/confluence/display/hive/languagemanual+ddl#LanguageManualDDL-Create/Drop/ReloadFunction diff --git a/src/parser/dialects/mssql.rs b/src/parser/dialects/mssql.rs index 45a32f236..d2024a333 100644 --- a/src/parser/dialects/mssql.rs +++ b/src/parser/dialects/mssql.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a [MsSql] `DECLARE` statement. /// /// Syntax: diff --git a/src/parser/dialects/postgresql.rs b/src/parser/dialects/postgresql.rs index d4f952a03..fd5c07e05 100644 --- a/src/parser/dialects/postgresql.rs +++ b/src/parser/dialects/postgresql.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse `CREATE FUNCTION` for [Postgres] /// /// [Postgres]: https://www.postgresql.org/docs/15/sql-createfunction.html diff --git a/src/parser/dialects/snowflake.rs b/src/parser/dialects/snowflake.rs index 6f7ed5f1a..b0ee4bae1 100644 --- a/src/parser/dialects/snowflake.rs +++ b/src/parser/dialects/snowflake.rs @@ -1,7 +1,7 @@ use crate::keywords::ALL_KEYWORDS; use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a [Snowflake] `DECLARE` statement. /// /// Syntax: diff --git a/src/parser/dialects/sqlite.rs b/src/parser/dialects/sqlite.rs index c84254973..ce4bf13d6 100644 --- a/src/parser/dialects/sqlite.rs +++ b/src/parser/dialects/sqlite.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// SQLite-specific `CREATE VIRTUAL TABLE` pub fn parse_create_virtual_table(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; diff --git a/src/parser/dialects/utils.rs b/src/parser/dialects/utils.rs index ec656a164..8771fe9b9 100644 --- a/src/parser/dialects/utils.rs +++ b/src/parser/dialects/utils.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse the body of a `CREATE FUNCTION` specified as a string. /// e.g. `CREATE FUNCTION ... AS $$ body $$`. pub(crate) fn parse_create_function_body_string(&mut self) -> Result { diff --git a/src/parser/discard.rs b/src/parser/discard.rs index 5e81ac8e3..5aa2aec3f 100644 --- a/src/parser/discard.rs +++ b/src/parser/discard.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_discard(&mut self) -> Result { let object_type = if self.parse_keyword(Keyword::ALL) { DiscardObject::ALL diff --git a/src/parser/drop.rs b/src/parser/drop.rs index 6f515af7e..bc175b947 100644 --- a/src/parser/drop.rs +++ b/src/parser/drop.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse statements of the DropTrigger type such as: /// /// ```sql diff --git a/src/parser/end.rs b/src/parser/end.rs index fd74eecf4..254b7b5ab 100644 --- a/src/parser/end.rs +++ b/src/parser/end.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_end(&mut self) -> Result { Ok(Statement::Commit { chain: self.parse_commit_rollback_chain()?, diff --git a/src/parser/execute.rs b/src/parser/execute.rs index b4a0d2c8c..90fd1504d 100644 --- a/src/parser/execute.rs +++ b/src/parser/execute.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_execute(&mut self) -> Result { let name = self.parse_object_name(false)?; diff --git a/src/parser/explain.rs b/src/parser/explain.rs index 9e24de0e0..81a445e79 100644 --- a/src/parser/explain.rs +++ b/src/parser/explain.rs @@ -1,6 +1,6 @@ use super::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_explain( &mut self, describe_alias: DescribeAlias, diff --git a/src/parser/fetch.rs b/src/parser/fetch.rs index d89b9445f..c1ac6c244 100644 --- a/src/parser/fetch.rs +++ b/src/parser/fetch.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { // FETCH [ direction { FROM | IN } ] cursor INTO target; pub fn parse_fetch_statement(&mut self) -> Result { let direction = if self.parse_keyword(Keyword::NEXT) { diff --git a/src/parser/flush.rs b/src/parser/flush.rs index 1818c4073..f89b648ca 100644 --- a/src/parser/flush.rs +++ b/src/parser/flush.rs @@ -2,7 +2,7 @@ use crate::parser::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_flush(&mut self) -> Result { let mut channel = None; let mut tables: Vec = vec![]; diff --git a/src/parser/grant.rs b/src/parser/grant.rs index eed67346f..fee9975d7 100644 --- a/src/parser/grant.rs +++ b/src/parser/grant.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a GRANT statement. pub fn parse_grant(&mut self) -> Result { let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; diff --git a/src/parser/identifier.rs b/src/parser/identifier.rs index 98a2dab10..1d60f5ff4 100644 --- a/src/parser/identifier.rs +++ b/src/parser/identifier.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a simple one-word identifier (possibly quoted, possibly a keyword) /// /// The `in_table_clause` parameter indicates whether the identifier is a table in a FROM, JOIN, or diff --git a/src/parser/insert.rs b/src/parser/insert.rs index 782397e0e..3562a3237 100644 --- a/src/parser/insert.rs +++ b/src/parser/insert.rs @@ -1,6 +1,6 @@ use super::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse an INSERT statement, returning a `Box`ed SetExpr /// /// This is used to reduce the size of the stack frames in debug builds diff --git a/src/parser/install.rs b/src/parser/install.rs index 92f3c679a..d1dc4007a 100644 --- a/src/parser/install.rs +++ b/src/parser/install.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// `INSTALL [extension_name]` pub fn parse_install(&mut self) -> Result { let extension_name = self.parse_identifier(false)?; diff --git a/src/parser/kill.rs b/src/parser/kill.rs index b86d04b21..d4c39cdc7 100644 --- a/src/parser/kill.rs +++ b/src/parser/kill.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { // KILL [CONNECTION | QUERY | MUTATION] processlist_id pub fn parse_kill(&mut self) -> Result { let modifier_keyword = diff --git a/src/parser/listen.rs b/src/parser/listen.rs index bf87ab8ae..ef7f99ac7 100644 --- a/src/parser/listen.rs +++ b/src/parser/listen.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_listen(&mut self) -> Result { let channel = self.parse_identifier(false)?; Ok(Statement::LISTEN { channel }) diff --git a/src/parser/load.rs b/src/parser/load.rs index d458b4118..fca741607 100644 --- a/src/parser/load.rs +++ b/src/parser/load.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a SQL LOAD statement pub fn parse_load(&mut self) -> Result { if self.dialect.supports_load_extension() { diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 62497ea35..acfaca5df 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_merge(&mut self) -> Result { let into = self.parse_keyword(Keyword::INTO); diff --git a/src/parser/msck.rs b/src/parser/msck.rs index 62aa218b4..1357c3f0e 100644 --- a/src/parser/msck.rs +++ b/src/parser/msck.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_msck(&mut self) -> Result { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword(Keyword::TABLE)?; diff --git a/src/parser/notify.rs b/src/parser/notify.rs index 3b80f6712..5aef89a0d 100644 --- a/src/parser/notify.rs +++ b/src/parser/notify.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_notify(&mut self) -> Result { let channel = self.parse_identifier(false)?; let payload = if self.consume_token(&Token::Comma) { diff --git a/src/parser/optimize.rs b/src/parser/optimize.rs index e61811db0..76753fa26 100644 --- a/src/parser/optimize.rs +++ b/src/parser/optimize.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// ```sql /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] /// ``` diff --git a/src/parser/options.rs b/src/parser/options.rs index acddbfaae..1f50f65e7 100644 --- a/src/parser/options.rs +++ b/src/parser/options.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn maybe_parse_options( &mut self, keyword: Keyword, diff --git a/src/parser/pragma.rs b/src/parser/pragma.rs index 91a807940..5a6727da9 100644 --- a/src/parser/pragma.rs +++ b/src/parser/pragma.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] pub fn parse_pragma(&mut self) -> Result { let name = self.parse_object_name(false)?; diff --git a/src/parser/prepare.rs b/src/parser/prepare.rs index 795130781..0b0343a25 100644 --- a/src/parser/prepare.rs +++ b/src/parser/prepare.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_prepare(&mut self) -> Result { let name = self.parse_identifier(false)?; diff --git a/src/parser/release.rs b/src/parser/release.rs index e3c5aa56b..fd18a576b 100644 --- a/src/parser/release.rs +++ b/src/parser/release.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_release(&mut self) -> Result { let _ = self.parse_keyword(Keyword::SAVEPOINT); let name = self.parse_identifier(false)?; diff --git a/src/parser/replace.rs b/src/parser/replace.rs index 6519b2b0a..6ae8b78f5 100644 --- a/src/parser/replace.rs +++ b/src/parser/replace.rs @@ -2,7 +2,7 @@ use crate::parser::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse an REPLACE statement pub fn parse_replace(&mut self) -> Result { if !dialect_of!(self is MySqlDialect | GenericDialect) { diff --git a/src/parser/revoke.rs b/src/parser/revoke.rs index a0c2f753a..235114767 100644 --- a/src/parser/revoke.rs +++ b/src/parser/revoke.rs @@ -2,7 +2,7 @@ use crate::parser::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a REVOKE statement pub fn parse_revoke(&mut self) -> Result { let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; diff --git a/src/parser/rollback.rs b/src/parser/rollback.rs index 1d44762d0..35872210a 100644 --- a/src/parser/rollback.rs +++ b/src/parser/rollback.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_rollback(&mut self) -> Result { let chain = self.parse_commit_rollback_chain()?; let savepoint = self.parse_rollback_savepoint()?; diff --git a/src/parser/savepoint.rs b/src/parser/savepoint.rs index a725e6caf..dca82b257 100644 --- a/src/parser/savepoint.rs +++ b/src/parser/savepoint.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_savepoint(&mut self) -> Result { let name = self.parse_identifier(false)?; Ok(Statement::Savepoint { name }) diff --git a/src/parser/select.rs b/src/parser/select.rs index 36614be8b..bcdd82233 100644 --- a/src/parser/select.rs +++ b/src/parser/select.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a query expression, i.e. a `SELECT` statement optionally /// preceded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't diff --git a/src/parser/set.rs b/src/parser/set.rs index 9194c7a57..df312fbfb 100644 --- a/src/parser/set.rs +++ b/src/parser/set.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_set(&mut self) -> Result { let modifier = self.parse_one_of_keywords(&[Keyword::SESSION, Keyword::LOCAL, Keyword::HIVEVAR]); @@ -18,7 +18,7 @@ impl<'a> Parser<'a> { && self.consume_token(&Token::LParen) { let variables = OneOrManyWithParens::Many( - self.parse_comma_separated(|parser: &mut Parser<'a>| { + self.parse_comma_separated(|parser: &mut Parser<'_>| { parser.parse_identifier(false) })? .into_iter() diff --git a/src/parser/show.rs b/src/parser/show.rs index af7e20432..f8b5f3a9a 100644 --- a/src/parser/show.rs +++ b/src/parser/show.rs @@ -1,6 +1,6 @@ use super::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_show(&mut self) -> Result { let terse = self.parse_keyword(Keyword::TERSE); let extended = self.parse_keyword(Keyword::EXTENDED); diff --git a/src/parser/start.rs b/src/parser/start.rs index fbceafb22..e0081c689 100644 --- a/src/parser/start.rs +++ b/src/parser/start.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_start_transaction(&mut self) -> Result { self.expect_keyword(Keyword::TRANSACTION)?; Ok(Statement::StartTransaction { diff --git a/src/parser/tokens.rs b/src/parser/tokens.rs index 94aeb60e0..21363206e 100644 --- a/src/parser/tokens.rs +++ b/src/parser/tokens.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Consume the parser and return its underlying token buffer pub fn into_tokens(self) -> Vec { self.tokens diff --git a/src/parser/truncate.rs b/src/parser/truncate.rs index b99c2d42a..8eccdaa32 100644 --- a/src/parser/truncate.rs +++ b/src/parser/truncate.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_truncate(&mut self) -> Result { let table = self.parse_keyword(Keyword::TABLE); let only = self.parse_keyword(Keyword::ONLY); diff --git a/src/parser/uncache.rs b/src/parser/uncache.rs index 21d7a3a63..bca640a49 100644 --- a/src/parser/uncache.rs +++ b/src/parser/uncache.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a UNCACHE TABLE statement pub fn parse_uncache_table(&mut self) -> Result { self.expect_keyword(Keyword::TABLE)?; diff --git a/src/parser/unlisten.rs b/src/parser/unlisten.rs index 20b9e8df1..29e321f6b 100644 --- a/src/parser/unlisten.rs +++ b/src/parser/unlisten.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_unlisten(&mut self) -> Result { let channel = if self.consume_token(&Token::Mul) { Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) diff --git a/src/parser/unload.rs b/src/parser/unload.rs index 8ba3e3fb2..a696404e5 100644 --- a/src/parser/unload.rs +++ b/src/parser/unload.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_unload(&mut self) -> Result { self.expect_token(&Token::LParen)?; let query = self.parse_query()?; diff --git a/src/parser/update.rs b/src/parser/update.rs index 7974f061d..5ccbcd126 100644 --- a/src/parser/update.rs +++ b/src/parser/update.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_update(&mut self) -> Result { let or = self.parse_conflict_clause(); let table = self.parse_table_and_joins()?; diff --git a/src/parser/use.rs b/src/parser/use.rs index 9b40499a1..2605adf01 100644 --- a/src/parser/use.rs +++ b/src/parser/use.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_use(&mut self) -> Result { // Determine which keywords are recognized by the current dialect let parsed_keyword = if dialect_of!(self is HiveDialect) { diff --git a/src/parser/value.rs b/src/parser/value.rs index d08a06bad..369da917c 100644 --- a/src/parser/value.rs +++ b/src/parser/value.rs @@ -2,7 +2,7 @@ use super::*; use crate::parser_err; -impl<'a> Parser<'a> { +impl Parser<'_> { /// Parse a literal value (numbers, strings, date/time, booleans) pub fn parse_value(&mut self) -> Result { let next_token = self.next_token(); diff --git a/src/parser/window.rs b/src/parser/window.rs index 59b56526e..9f6130b15 100644 --- a/src/parser/window.rs +++ b/src/parser/window.rs @@ -1,6 +1,6 @@ use crate::parser::*; -impl<'a> Parser<'a> { +impl Parser<'_> { pub fn parse_named_window(&mut self) -> Result { let ident = self.parse_identifier(false)?; self.expect_keyword(Keyword::AS)?;