Skip to content

Commit b6d188d

Browse files
committed
Extract dialect-related utilities from test_utils.rs and re-export
1 parent 6b574c3 commit b6d188d

File tree

4 files changed

+293
-259
lines changed

4 files changed

+293
-259
lines changed

src/parser/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14271,6 +14271,7 @@ impl Word {
1427114271
}
1427214272

1427314273
#[cfg(test)]
14274+
#[cfg(feature = "parser")]
1427414275
mod tests {
1427514276
use crate::test_utils::{all_dialects, TestedDialects};
1427614277

src/test_dialect_utils.rs

Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
/// This module contains internal utilities used for testing the library.
19+
/// While technically public, the library's users are not supposed to rely
20+
/// on this module, as it will change without notice.
21+
//
22+
// It's re-exported in `src/test_utils.rs` and used in integration tests
23+
// via `tests::test_utils::*`.
24+
25+
use core::fmt::Debug;
26+
27+
use crate::ast::*;
28+
use crate::dialect::*;
29+
use crate::parser::Parser;
30+
use crate::parser::{ParserError, ParserOptions};
31+
use crate::tokenizer::{Token, Tokenizer};
32+
33+
/// Tests use the methods on this struct to invoke the parser on one or
34+
/// multiple dialects.
35+
pub struct TestedDialects {
36+
pub dialects: Vec<Box<dyn Dialect>>,
37+
pub options: Option<ParserOptions>,
38+
pub recursion_limit: Option<usize>,
39+
}
40+
41+
impl TestedDialects {
42+
/// Create a TestedDialects with default options and the given dialects.
43+
pub fn new(dialects: Vec<Box<dyn Dialect>>) -> Self {
44+
Self {
45+
dialects,
46+
options: None,
47+
recursion_limit: None,
48+
}
49+
}
50+
51+
pub fn new_with_options(dialects: Vec<Box<dyn Dialect>>, options: ParserOptions) -> Self {
52+
Self {
53+
dialects,
54+
options: Some(options),
55+
recursion_limit: None,
56+
}
57+
}
58+
59+
pub fn with_recursion_limit(mut self, recursion_limit: usize) -> Self {
60+
self.recursion_limit = Some(recursion_limit);
61+
self
62+
}
63+
64+
fn new_parser<'a>(&self, dialect: &'a dyn Dialect) -> Parser<'a> {
65+
let parser = Parser::new(dialect);
66+
let parser = if let Some(options) = &self.options {
67+
parser.with_options(options.clone())
68+
} else {
69+
parser
70+
};
71+
72+
let parser = if let Some(recursion_limit) = &self.recursion_limit {
73+
parser.with_recursion_limit(*recursion_limit)
74+
} else {
75+
parser
76+
};
77+
78+
parser
79+
}
80+
81+
/// Run the given function for all of `self.dialects`, assert that they
82+
/// return the same result, and return that result.
83+
pub fn one_of_identical_results<F, T: Debug + PartialEq>(&self, f: F) -> T
84+
where
85+
F: Fn(&dyn Dialect) -> T,
86+
{
87+
let parse_results = self.dialects.iter().map(|dialect| (dialect, f(&**dialect)));
88+
parse_results
89+
.fold(None, |s, (dialect, parsed)| {
90+
if let Some((prev_dialect, prev_parsed)) = s {
91+
assert_eq!(
92+
prev_parsed, parsed,
93+
"Parse results with {prev_dialect:?} are different from {dialect:?}"
94+
);
95+
}
96+
Some((dialect, parsed))
97+
})
98+
.expect("tested dialects cannot be empty")
99+
.1
100+
}
101+
102+
pub fn run_parser_method<F, T: Debug + PartialEq>(&self, sql: &str, f: F) -> T
103+
where
104+
F: Fn(&mut Parser) -> T,
105+
{
106+
self.one_of_identical_results(|dialect| {
107+
let mut parser = self.new_parser(dialect).try_with_sql(sql).unwrap();
108+
f(&mut parser)
109+
})
110+
}
111+
112+
/// Parses a single SQL string into multiple statements, ensuring
113+
/// the result is the same for all tested dialects.
114+
pub fn parse_sql_statements(&self, sql: &str) -> Result<Vec<Statement>, ParserError> {
115+
self.one_of_identical_results(|dialect| {
116+
let mut tokenizer = Tokenizer::new(dialect, sql);
117+
if let Some(options) = &self.options {
118+
tokenizer = tokenizer.with_unescape(options.unescape);
119+
}
120+
let tokens = tokenizer.tokenize()?;
121+
self.new_parser(dialect)
122+
.with_tokens(tokens)
123+
.parse_statements()
124+
})
125+
// To fail the `ensure_multiple_dialects_are_tested` test:
126+
// Parser::parse_sql(&**self.dialects.first().unwrap(), sql)
127+
}
128+
129+
/// Ensures that `sql` parses as a single [Statement] for all tested
130+
/// dialects.
131+
///
132+
/// In general, the canonical SQL should be the same (see crate
133+
/// documentation for rationale) and you should prefer the `verified_`
134+
/// variants in testing, such as [`verified_statement`] or
135+
/// [`verified_query`].
136+
///
137+
/// If `canonical` is non empty,this function additionally asserts
138+
/// that:
139+
///
140+
/// 1. parsing `sql` results in the same [`Statement`] as parsing
141+
/// `canonical`.
142+
///
143+
/// 2. re-serializing the result of parsing `sql` produces the same
144+
/// `canonical` sql string
145+
pub fn one_statement_parses_to(&self, sql: &str, canonical: &str) -> Statement {
146+
let mut statements = self.parse_sql_statements(sql).expect(sql);
147+
assert_eq!(statements.len(), 1);
148+
if !canonical.is_empty() && sql != canonical {
149+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
150+
}
151+
152+
let only_statement = statements.pop().unwrap();
153+
154+
if !canonical.is_empty() {
155+
assert_eq!(canonical, only_statement.to_string())
156+
}
157+
only_statement
158+
}
159+
160+
/// Ensures that `sql` parses as an [`Expr`], and that
161+
/// re-serializing the parse result produces canonical
162+
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
163+
let ast = self
164+
.run_parser_method(sql, |parser| parser.parse_expr())
165+
.unwrap();
166+
assert_eq!(canonical, &ast.to_string());
167+
ast
168+
}
169+
170+
/// Ensures that `sql` parses as a single [Statement], and that
171+
/// re-serializing the parse result produces the same `sql`
172+
/// string (is not modified after a serialization round-trip).
173+
pub fn verified_stmt(&self, sql: &str) -> Statement {
174+
self.one_statement_parses_to(sql, sql)
175+
}
176+
177+
/// Ensures that `sql` parses as a single [Query], and that
178+
/// re-serializing the parse result produces the same `sql`
179+
/// string (is not modified after a serialization round-trip).
180+
pub fn verified_query(&self, sql: &str) -> Query {
181+
match self.verified_stmt(sql) {
182+
Statement::Query(query) => *query,
183+
_ => panic!("Expected Query"),
184+
}
185+
}
186+
187+
/// Ensures that `sql` parses as a single [Query], and that
188+
/// re-serializing the parse result matches the given canonical
189+
/// sql string.
190+
pub fn verified_query_with_canonical(&self, query: &str, canonical: &str) -> Query {
191+
match self.one_statement_parses_to(query, canonical) {
192+
Statement::Query(query) => *query,
193+
_ => panic!("Expected Query"),
194+
}
195+
}
196+
197+
/// Ensures that `sql` parses as a single [Select], and that
198+
/// re-serializing the parse result produces the same `sql`
199+
/// string (is not modified after a serialization round-trip).
200+
pub fn verified_only_select(&self, query: &str) -> Select {
201+
match *self.verified_query(query).body {
202+
SetExpr::Select(s) => *s,
203+
_ => panic!("Expected SetExpr::Select"),
204+
}
205+
}
206+
207+
/// Ensures that `sql` parses as a single [`Select`], and that additionally:
208+
///
209+
/// 1. parsing `sql` results in the same [`Statement`] as parsing
210+
/// `canonical`.
211+
///
212+
/// 2. re-serializing the result of parsing `sql` produces the same
213+
/// `canonical` sql string
214+
pub fn verified_only_select_with_canonical(&self, query: &str, canonical: &str) -> Select {
215+
let q = match self.one_statement_parses_to(query, canonical) {
216+
Statement::Query(query) => *query,
217+
_ => panic!("Expected Query"),
218+
};
219+
match *q.body {
220+
SetExpr::Select(s) => *s,
221+
_ => panic!("Expected SetExpr::Select"),
222+
}
223+
}
224+
225+
/// Ensures that `sql` parses as an [`Expr`], and that
226+
/// re-serializing the parse result produces the same `sql`
227+
/// string (is not modified after a serialization round-trip).
228+
pub fn verified_expr(&self, sql: &str) -> Expr {
229+
self.expr_parses_to(sql, sql)
230+
}
231+
232+
/// Check that the tokenizer returns the expected tokens for the given SQL.
233+
pub fn tokenizes_to(&self, sql: &str, expected: Vec<Token>) {
234+
if self.dialects.is_empty() {
235+
panic!("No dialects to test");
236+
}
237+
238+
self.dialects.iter().for_each(|dialect| {
239+
let mut tokenizer = Tokenizer::new(&**dialect, sql);
240+
if let Some(options) = &self.options {
241+
tokenizer = tokenizer.with_unescape(options.unescape);
242+
}
243+
let tokens = tokenizer.tokenize().unwrap();
244+
assert_eq!(expected, tokens, "Tokenized differently for {:?}", dialect);
245+
});
246+
}
247+
}
248+
249+
/// Returns all available dialects.
250+
pub fn all_dialects() -> TestedDialects {
251+
TestedDialects::new(vec![
252+
Box::new(GenericDialect {}),
253+
Box::new(PostgreSqlDialect {}),
254+
Box::new(MsSqlDialect {}),
255+
Box::new(AnsiDialect {}),
256+
Box::new(SnowflakeDialect {}),
257+
Box::new(HiveDialect {}),
258+
Box::new(RedshiftSqlDialect {}),
259+
Box::new(MySqlDialect {}),
260+
Box::new(BigQueryDialect {}),
261+
Box::new(SQLiteDialect {}),
262+
Box::new(DuckDbDialect {}),
263+
Box::new(DatabricksDialect {}),
264+
Box::new(ClickHouseDialect {}),
265+
])
266+
}
267+
268+
/// Returns all dialects matching the given predicate.
269+
pub fn all_dialects_where<F>(predicate: F) -> TestedDialects
270+
where
271+
F: Fn(&dyn Dialect) -> bool,
272+
{
273+
let mut dialects = all_dialects();
274+
dialects.dialects.retain(|d| predicate(&**d));
275+
dialects
276+
}
277+
278+
/// Returns available dialects. The `except` predicate is used
279+
/// to filter out specific dialects.
280+
pub fn all_dialects_except<F>(except: F) -> TestedDialects
281+
where
282+
F: Fn(&dyn Dialect) -> bool,
283+
{
284+
all_dialects_where(|d| !except(d))
285+
}

0 commit comments

Comments
 (0)