Skip to content

Add support for Snowflake identifier function #1929

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,12 +344,14 @@ impl fmt::Display for ObjectName {
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum ObjectNamePart {
Identifier(Ident),
Function(ObjectNamePartFunction),
}

impl ObjectNamePart {
pub fn as_ident(&self) -> Option<&Ident> {
match self {
ObjectNamePart::Identifier(ident) => Some(ident),
ObjectNamePart::Function(_) => None,
}
}
}
Expand All @@ -358,10 +360,30 @@ impl fmt::Display for ObjectNamePart {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
ObjectNamePart::Identifier(ident) => write!(f, "{ident}"),
ObjectNamePart::Function(func) => write!(f, "{func}"),
}
}
}

/// An object name part that consists of a function that dynamically
/// constructs identifiers.
///
/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal)
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub struct ObjectNamePartFunction {
pub name: Ident,
pub args: Vec<FunctionArg>,
}

impl fmt::Display for ObjectNamePartFunction {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}(", self.name)?;
write!(f, "{})", display_comma_separated(&self.args))
}
}

/// Represents an Array Expression, either
/// `ARRAY[..]`, or `[..]`
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
Expand Down
4 changes: 4 additions & 0 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1671,6 +1671,10 @@ impl Spanned for ObjectNamePart {
fn span(&self) -> Span {
match self {
ObjectNamePart::Identifier(ident) => ident.span,
ObjectNamePart::Function(func) => func
.name
.span
.union(&union_spans(func.args.iter().map(|i| i.span()))),
}
}
}
Expand Down
11 changes: 10 additions & 1 deletion src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub use self::postgresql::PostgreSqlDialect;
pub use self::redshift::RedshiftSqlDialect;
pub use self::snowflake::SnowflakeDialect;
pub use self::sqlite::SQLiteDialect;
use crate::ast::{ColumnOption, Expr, GranteesType, Statement};
use crate::ast::{ColumnOption, Expr, GranteesType, Ident, Statement};
pub use crate::keywords;
use crate::keywords::Keyword;
use crate::parser::{Parser, ParserError};
Expand Down Expand Up @@ -1076,6 +1076,15 @@ pub trait Dialect: Debug + Any {
fn supports_comma_separated_drop_column_list(&self) -> bool {
false
}

/// Returns true if the dialect considers the specified ident as a function
/// that returns an identifier. Typically used to generate identifiers
/// programmatically.
///
/// - [Snowflake](https://docs.snowflake.com/en/sql-reference/identifier-literal)
fn is_identifier_generating_function_name(&self, _ident: &Ident) -> bool {
false
}
}

/// This represents the operators for which precedence must be defined
Expand Down
9 changes: 9 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,15 @@ impl Dialect for SnowflakeDialect {
fn supports_comma_separated_drop_column_list(&self) -> bool {
true
}

fn is_identifier_generating_function_name(&self, ident: &Ident) -> bool {
ident.quote_style.is_none() && ident.value.to_lowercase() == "identifier"
}

// For example: `SELECT IDENTIFIER('alias1').* FROM tbl AS alias1`
fn supports_select_expr_star(&self) -> bool {
true
}
}

fn parse_file_staging_command(kw: Keyword, parser: &mut Parser) -> Result<Statement, ParserError> {
Expand Down
126 changes: 78 additions & 48 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10353,70 +10353,84 @@ impl<'a> Parser<'a> {
}
}

/// Parse a possibly qualified, possibly quoted identifier, optionally allowing for wildcards,
/// Parse a possibly qualified, possibly quoted identifier, e.g.
/// `foo` or `myschema."table"
///
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
/// in this context on BigQuery.
pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result<ObjectName, ParserError> {
self.parse_object_name_inner(in_table_clause, false)
}

/// Parse a possibly qualified, possibly quoted identifier, e.g.
/// `foo` or `myschema."table"
///
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
/// in this context on BigQuery.
///
/// The `allow_wildcards` parameter indicates whether to allow for wildcards in the object name
/// e.g. *, *.*, `foo`.*, or "foo"."bar"
fn parse_object_name_with_wildcards(
fn parse_object_name_inner(
&mut self,
in_table_clause: bool,
allow_wildcards: bool,
) -> Result<ObjectName, ParserError> {
let mut idents = vec![];

let mut parts = vec![];
if dialect_of!(self is BigQueryDialect) && in_table_clause {
loop {
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
idents.push(ident);
parts.push(ObjectNamePart::Identifier(ident));
if !self.consume_token(&Token::Period) && !end_with_period {
break;
}
}
} else {
loop {
let ident = if allow_wildcards && self.peek_token().token == Token::Mul {
if allow_wildcards && self.peek_token().token == Token::Mul {
let span = self.next_token().span;
Ident {
parts.push(ObjectNamePart::Identifier(Ident {
value: Token::Mul.to_string(),
quote_style: None,
span,
}));
} else if let Some(func_part) =
self.maybe_parse(|parser| parser.parse_object_name_function_part())?
{
parts.push(ObjectNamePart::Function(func_part));
} else if dialect_of!(self is BigQueryDialect) && in_table_clause {
let (ident, end_with_period) = self.parse_unquoted_hyphenated_identifier()?;
parts.push(ObjectNamePart::Identifier(ident));
if !self.consume_token(&Token::Period) && !end_with_period {
break;
}
} else if self.dialect.supports_object_name_double_dot_notation()
&& parts.len() == 1
&& matches!(self.peek_token().token, Token::Period)
{
// Empty string here means default schema
parts.push(ObjectNamePart::Identifier(Ident::new("")));
} else {
if self.dialect.supports_object_name_double_dot_notation()
&& idents.len() == 1
&& self.consume_token(&Token::Period)
{
// Empty string here means default schema
idents.push(Ident::new(""));
}
self.parse_identifier()?
};
idents.push(ident);
let ident = self.parse_identifier()?;
parts.push(ObjectNamePart::Identifier(ident));
}

if !self.consume_token(&Token::Period) {
break;
}
}
}
Ok(ObjectName::from(idents))
}

/// Parse a possibly qualified, possibly quoted identifier, e.g.
/// `foo` or `myschema."table"
///
/// The `in_table_clause` parameter indicates whether the object name is a table in a FROM, JOIN,
/// or similar table clause. Currently, this is used only to support unquoted hyphenated identifiers
/// in this context on BigQuery.
pub fn parse_object_name(&mut self, in_table_clause: bool) -> Result<ObjectName, ParserError> {
let ObjectName(mut idents) =
self.parse_object_name_with_wildcards(in_table_clause, false)?;

// BigQuery accepts any number of quoted identifiers of a table name.
// https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_identifiers
if dialect_of!(self is BigQueryDialect)
&& idents.iter().any(|part| {
&& parts.iter().any(|part| {
part.as_ident()
.is_some_and(|ident| ident.value.contains('.'))
})
{
idents = idents
parts = parts
.into_iter()
.flat_map(|part| match part.as_ident() {
Some(ident) => ident
Expand All @@ -10435,7 +10449,23 @@ impl<'a> Parser<'a> {
.collect()
}

Ok(ObjectName(idents))
Ok(ObjectName(parts))
}

fn parse_object_name_function_part(&mut self) -> Result<ObjectNamePartFunction, ParserError> {
let name = self.parse_identifier()?;
if self.dialect.is_identifier_generating_function_name(&name) {
self.expect_token(&Token::LParen)?;
let args: Vec<FunctionArg> =
self.parse_comma_separated0(Self::parse_function_args, Token::RParen)?;
self.expect_token(&Token::RParen)?;
Ok(ObjectNamePartFunction { name, args })
} else {
self.expected(
"dialect specific identifier-generating function",
self.peek_token(),
)
}
}

/// Parse identifiers
Expand Down Expand Up @@ -13938,25 +13968,25 @@ impl<'a> Parser<'a> {
schemas: self.parse_comma_separated(|p| p.parse_object_name(false))?,
})
} else if self.parse_keywords(&[Keyword::RESOURCE, Keyword::MONITOR]) {
Some(GrantObjects::ResourceMonitors(self.parse_comma_separated(
|p| p.parse_object_name_with_wildcards(false, true),
)?))
Some(GrantObjects::ResourceMonitors(
self.parse_comma_separated(|p| p.parse_object_name(false))?,
))
} else if self.parse_keywords(&[Keyword::COMPUTE, Keyword::POOL]) {
Some(GrantObjects::ComputePools(self.parse_comma_separated(
|p| p.parse_object_name_with_wildcards(false, true),
)?))
Some(GrantObjects::ComputePools(
self.parse_comma_separated(|p| p.parse_object_name(false))?,
))
} else if self.parse_keywords(&[Keyword::FAILOVER, Keyword::GROUP]) {
Some(GrantObjects::FailoverGroup(self.parse_comma_separated(
|p| p.parse_object_name_with_wildcards(false, true),
)?))
Some(GrantObjects::FailoverGroup(
self.parse_comma_separated(|p| p.parse_object_name(false))?,
))
} else if self.parse_keywords(&[Keyword::REPLICATION, Keyword::GROUP]) {
Some(GrantObjects::ReplicationGroup(self.parse_comma_separated(
|p| p.parse_object_name_with_wildcards(false, true),
)?))
Some(GrantObjects::ReplicationGroup(
self.parse_comma_separated(|p| p.parse_object_name(false))?,
))
} else if self.parse_keywords(&[Keyword::EXTERNAL, Keyword::VOLUME]) {
Some(GrantObjects::ExternalVolumes(self.parse_comma_separated(
|p| p.parse_object_name_with_wildcards(false, true),
)?))
Some(GrantObjects::ExternalVolumes(
self.parse_comma_separated(|p| p.parse_object_name(false))?,
))
} else {
let object_type = self.parse_one_of_keywords(&[
Keyword::SEQUENCE,
Expand All @@ -13973,7 +14003,7 @@ impl<'a> Parser<'a> {
Keyword::CONNECTION,
]);
let objects =
self.parse_comma_separated(|p| p.parse_object_name_with_wildcards(false, true));
self.parse_comma_separated(|p| p.parse_object_name_inner(false, true));
match object_type {
Some(Keyword::DATABASE) => Some(GrantObjects::Databases(objects?)),
Some(Keyword::SCHEMA) => Some(GrantObjects::Schemas(objects?)),
Expand Down
6 changes: 5 additions & 1 deletion tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1232,14 +1232,18 @@ fn parse_select_expr_star() {
"SELECT 2. * 3 FROM T",
);
dialects.verified_only_select("SELECT myfunc().* FROM T");
dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T");

// Invalid
let res = dialects.parse_sql_statements("SELECT foo.*.* FROM T");
assert_eq!(
ParserError::ParserError("Expected: end of statement, found: .".to_string()),
res.unwrap_err()
);

let dialects = all_dialects_where(|d| {
d.supports_select_expr_star() && d.supports_select_wildcard_except()
});
dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T");
}

#[test]
Expand Down
Loading