Skip to content

Commit b6971e8

Browse files
committed
Extend implementation & testing for parsing without semicolons
- introduce `statements_without_semicolons_parse_to` test helper - for the implementation, `RETURN` parsing needs to be tightened up to avoid ambiguity & tests that formerly asserted "end of statement" now maybe need to assert "an SQL statement" - a new `assert_err_parse_statements` splits the dialects based on semicolon requirements & asserts the expected error message accordingly
1 parent c5e6ba5 commit b6971e8

File tree

7 files changed

+626
-130
lines changed

7 files changed

+626
-130
lines changed

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1122,6 +1122,11 @@ pub trait Dialect: Debug + Any {
11221122
) -> bool {
11231123
false
11241124
}
1125+
1126+
/// Returns true if the dialect supports parsing statements without a semicolon delimiter.
1127+
fn supports_statements_without_semicolon_delimiter(&self) -> bool {
1128+
false
1129+
}
11251130
}
11261131

11271132
/// This represents the operators for which precedence must be defined

src/dialect/mssql.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ impl Dialect for MsSqlDialect {
6767
}
6868

6969
fn supports_connect_by(&self) -> bool {
70-
true
70+
false
7171
}
7272

7373
fn supports_eq_alias_assignment(&self) -> bool {
@@ -280,6 +280,9 @@ impl MsSqlDialect {
280280
) -> Result<Vec<Statement>, ParserError> {
281281
let mut stmts = Vec::new();
282282
loop {
283+
while let Token::SemiColon = parser.peek_token_ref().token {
284+
parser.advance_token();
285+
}
283286
if let Token::EOF = parser.peek_token_ref().token {
284287
break;
285288
}

src/keywords.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,6 +1071,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[
10711071
Keyword::ANTI,
10721072
Keyword::SEMI,
10731073
Keyword::RETURNING,
1074+
Keyword::RETURN,
10741075
Keyword::ASOF,
10751076
Keyword::MATCH_CONDITION,
10761077
// for MSSQL-specific OUTER APPLY (seems reserved in most dialects)
@@ -1125,6 +1126,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[
11251126
Keyword::CLUSTER,
11261127
Keyword::DISTRIBUTE,
11271128
Keyword::RETURNING,
1129+
Keyword::RETURN,
11281130
// Reserved only as a column alias in the `SELECT` clause
11291131
Keyword::FROM,
11301132
Keyword::INTO,
@@ -1139,6 +1141,7 @@ pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[
11391141
Keyword::LIMIT,
11401142
Keyword::HAVING,
11411143
Keyword::WHERE,
1144+
Keyword::RETURN,
11421145
];
11431146

11441147
/// Global list of reserved keywords that cannot be parsed as identifiers
@@ -1149,4 +1152,5 @@ pub const RESERVED_FOR_IDENTIFIER: &[Keyword] = &[
11491152
Keyword::INTERVAL,
11501153
Keyword::STRUCT,
11511154
Keyword::TRIM,
1155+
Keyword::RETURN,
11521156
];

src/parser/mod.rs

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,22 @@ impl ParserOptions {
265265
self.unescape = unescape;
266266
self
267267
}
268+
269+
/// Set if semicolon statement delimiters are required.
270+
///
271+
/// If this option is `true`, the following SQL will not parse. If the option is `false`, the SQL will parse.
272+
///
273+
/// ```sql
274+
/// SELECT 1
275+
/// SELECT 2
276+
/// ```
277+
pub fn with_require_semicolon_stmt_delimiter(
278+
mut self,
279+
require_semicolon_stmt_delimiter: bool,
280+
) -> Self {
281+
self.require_semicolon_stmt_delimiter = require_semicolon_stmt_delimiter;
282+
self
283+
}
268284
}
269285

270286
#[derive(Copy, Clone)]
@@ -355,7 +371,11 @@ impl<'a> Parser<'a> {
355371
state: ParserState::Normal,
356372
dialect,
357373
recursion_counter: RecursionCounter::new(DEFAULT_REMAINING_DEPTH),
358-
options: ParserOptions::new().with_trailing_commas(dialect.supports_trailing_commas()),
374+
options: ParserOptions::new()
375+
.with_trailing_commas(dialect.supports_trailing_commas())
376+
.with_require_semicolon_stmt_delimiter(
377+
!dialect.supports_statements_without_semicolon_delimiter(),
378+
),
359379
}
360380
}
361381

@@ -478,10 +498,10 @@ impl<'a> Parser<'a> {
478498
match self.peek_token().token {
479499
Token::EOF => break,
480500

481-
// end of statement
482-
Token::Word(word) => {
483-
if expecting_statement_delimiter && word.keyword == Keyword::END {
484-
break;
501+
// don't expect a semicolon statement delimiter after a newline when not otherwise required
502+
Token::Whitespace(Whitespace::Newline) => {
503+
if !self.options.require_semicolon_stmt_delimiter {
504+
expecting_statement_delimiter = false;
485505
}
486506
}
487507
_ => {}
@@ -493,7 +513,7 @@ impl<'a> Parser<'a> {
493513

494514
let statement = self.parse_statement()?;
495515
stmts.push(statement);
496-
expecting_statement_delimiter = true;
516+
expecting_statement_delimiter = self.options.require_semicolon_stmt_delimiter;
497517
}
498518
Ok(stmts)
499519
}
@@ -4533,6 +4553,9 @@ impl<'a> Parser<'a> {
45334553
) -> Result<Vec<Statement>, ParserError> {
45344554
let mut values = vec![];
45354555
loop {
4556+
// ignore empty statements (between successive statement delimiters)
4557+
while self.consume_token(&Token::SemiColon) {}
4558+
45364559
match &self.peek_nth_token_ref(0).token {
45374560
Token::EOF => break,
45384561
Token::Word(w) => {
@@ -4544,7 +4567,13 @@ impl<'a> Parser<'a> {
45444567
}
45454568

45464569
values.push(self.parse_statement()?);
4547-
self.expect_token(&Token::SemiColon)?;
4570+
4571+
if self.options.require_semicolon_stmt_delimiter {
4572+
self.expect_token(&Token::SemiColon)?;
4573+
}
4574+
4575+
// ignore empty statements (between successive statement delimiters)
4576+
while self.consume_token(&Token::SemiColon) {}
45484577
}
45494578
Ok(values)
45504579
}
@@ -16370,7 +16399,28 @@ impl<'a> Parser<'a> {
1637016399

1637116400
/// Parse [Statement::Return]
1637216401
fn parse_return(&mut self) -> Result<Statement, ParserError> {
16373-
match self.maybe_parse(|p| p.parse_expr())? {
16402+
let rs = self.maybe_parse(|p| {
16403+
let expr = p.parse_expr()?;
16404+
16405+
match &expr {
16406+
Expr::Value(_)
16407+
| Expr::Function(_)
16408+
| Expr::UnaryOp { .. }
16409+
| Expr::BinaryOp { .. }
16410+
| Expr::Case { .. }
16411+
| Expr::Cast { .. }
16412+
| Expr::Convert { .. }
16413+
| Expr::Subquery(_) => Ok(expr),
16414+
// todo: how to retstrict to variables?
16415+
Expr::Identifier(id) if id.value.starts_with('@') => Ok(expr),
16416+
_ => parser_err!(
16417+
"Non-returnable expression found following RETURN",
16418+
p.peek_token().span.start
16419+
),
16420+
}
16421+
})?;
16422+
16423+
match rs {
1637416424
Some(expr) => Ok(Statement::Return(ReturnStatement {
1637516425
value: Some(ReturnStatementValue::Expr(expr)),
1637616426
})),

src/test_utils.rs

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,37 @@ impl TestedDialects {
186186
statements
187187
}
188188

189+
/// The same as [`statements_parse_to`] but it will strip semicolons from the SQL text.
190+
pub fn statements_without_semicolons_parse_to(
191+
&self,
192+
sql: &str,
193+
canonical: &str,
194+
) -> Vec<Statement> {
195+
let sql_without_semicolons = sql
196+
.replace("; ", " ")
197+
.replace(" ;", " ")
198+
.replace(";\n", "\n")
199+
.replace("\n;", "\n")
200+
.replace(";", " ");
201+
let statements = self
202+
.parse_sql_statements(&sql_without_semicolons)
203+
.expect(&sql_without_semicolons);
204+
if !canonical.is_empty() && sql != canonical {
205+
assert_eq!(self.parse_sql_statements(canonical).unwrap(), statements);
206+
} else {
207+
assert_eq!(
208+
sql,
209+
statements
210+
.iter()
211+
// note: account for format_statement_list manually inserted semicolons
212+
.map(|s| s.to_string().trim_end_matches(";").to_string())
213+
.collect::<Vec<_>>()
214+
.join("; ")
215+
);
216+
}
217+
statements
218+
}
219+
189220
/// Ensures that `sql` parses as an [`Expr`], and that
190221
/// re-serializing the parse result produces canonical
191222
pub fn expr_parses_to(&self, sql: &str, canonical: &str) -> Expr {
@@ -318,6 +349,43 @@ where
318349
all_dialects_where(|d| !except(d))
319350
}
320351

352+
/// Returns all dialects that don't support statements without semicolon delimiters.
353+
/// (i.e. dialects that require semicolon delimiters.)
354+
pub fn all_dialects_requiring_semicolon_statement_delimiter() -> TestedDialects {
355+
let tested_dialects =
356+
all_dialects_except(|d| d.supports_statements_without_semicolon_delimiter());
357+
assert_ne!(tested_dialects.dialects.len(), 0);
358+
tested_dialects
359+
}
360+
361+
/// Returns all dialects that do support statements without semicolon delimiters.
362+
/// (i.e. dialects not requiring semicolon delimiters.)
363+
pub fn all_dialects_not_requiring_semicolon_statement_delimiter() -> TestedDialects {
364+
let tested_dialects =
365+
all_dialects_where(|d| d.supports_statements_without_semicolon_delimiter());
366+
assert_ne!(tested_dialects.dialects.len(), 0);
367+
tested_dialects
368+
}
369+
370+
/// Asserts an error for `parse_sql_statements`:
371+
/// - "end of statement" for dialects that require semicolon delimiters
372+
/// - "an SQL statement" for dialects that don't require semicolon delimiters.
373+
pub fn assert_err_parse_statements(sql: &str, found: &str) {
374+
assert_eq!(
375+
ParserError::ParserError(format!("Expected: end of statement, found: {}", found)),
376+
all_dialects_requiring_semicolon_statement_delimiter()
377+
.parse_sql_statements(sql)
378+
.unwrap_err()
379+
);
380+
381+
assert_eq!(
382+
ParserError::ParserError(format!("Expected: an SQL statement, found: {}", found)),
383+
all_dialects_not_requiring_semicolon_statement_delimiter()
384+
.parse_sql_statements(sql)
385+
.unwrap_err()
386+
);
387+
}
388+
321389
pub fn assert_eq_vec<T: ToString>(expected: &[&str], actual: &[T]) {
322390
assert_eq!(
323391
expected,

0 commit comments

Comments
 (0)