Skip to content

feat: implement clickhouse-user-query #2554

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
331 changes: 316 additions & 15 deletions Cargo.lock

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions packages/common/clickhouse-user-query/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[package]
name = "clickhouse-user-query"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true

[dependencies]
clickhouse = "0.12"
thiserror = "1.0"
serde = { version = "1.0", features = ["derive"] }

[dev-dependencies]
serde_json = "1.0"
testcontainers = "0.24"
tokio = { version = "1.0", features = ["full"] }
207 changes: 207 additions & 0 deletions packages/common/clickhouse-user-query/src/builder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
use clickhouse::query::Query;
use clickhouse::sql::Identifier;
use serde::{Deserialize, Serialize};

use crate::error::{Result, UserQueryError};
use crate::query::QueryExpr;
use crate::schema::{PropertyType, Schema};

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UserDefinedQueryBuilder {
where_clause: String,
bind_values: Vec<BindValue>,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
enum BindValue {
Bool(bool),
String(String),
Number(f64),
ArrayString(Vec<String>),
}

impl UserDefinedQueryBuilder {
pub fn new(schema: &Schema, expr: &QueryExpr) -> Result<Self> {
let mut builder = QueryBuilder::new(schema);
let where_clause = builder.build_where_clause(expr)?;

if where_clause.trim().is_empty() {
return Err(UserQueryError::EmptyQuery);
}

Ok(Self {
where_clause,
bind_values: builder.bind_values,
})
}

pub fn bind_to(&self, mut query: Query) -> Query {
for bind_value in &self.bind_values {
query = match bind_value {
BindValue::Bool(v) => query.bind(*v),
BindValue::String(v) => query.bind(v),
BindValue::Number(v) => query.bind(*v),
BindValue::ArrayString(v) => query.bind(v),
};
}
query
}

pub fn where_expr(&self) -> &str {
&self.where_clause
}
}

struct QueryBuilder<'a> {
schema: &'a Schema,
bind_values: Vec<BindValue>,
}

impl<'a> QueryBuilder<'a> {
fn new(schema: &'a Schema) -> Self {
Self {
schema,
bind_values: Vec::new(),
}
}

fn build_where_clause(&mut self, expr: &QueryExpr) -> Result<String> {
match expr {
QueryExpr::And { exprs } => {
if exprs.is_empty() {
return Err(UserQueryError::EmptyQuery);
}
let clauses: Result<Vec<_>> = exprs
.iter()
.map(|e| self.build_where_clause(e))
.collect();
Ok(format!("({})", clauses?.join(" AND ")))
}
QueryExpr::Or { exprs } => {
if exprs.is_empty() {
return Err(UserQueryError::EmptyQuery);
}
let clauses: Result<Vec<_>> = exprs
.iter()
.map(|e| self.build_where_clause(e))
.collect();
Ok(format!("({})", clauses?.join(" OR ")))
}
QueryExpr::BoolEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Bool)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Bool(*value));
Ok(format!("{} = ?", column))
}
QueryExpr::BoolNotEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Bool)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Bool(*value));
Ok(format!("{} != ?", column))
}
QueryExpr::StringEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::String)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::String(value.clone()));
Ok(format!("{} = ?", column))
}
QueryExpr::StringNotEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::String)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::String(value.clone()));
Ok(format!("{} != ?", column))
}
QueryExpr::ArrayContains { property, subproperty, values } => {
if values.is_empty() {
return Err(UserQueryError::EmptyArrayValues("ArrayContains".to_string()));
}
self.validate_property_access(property, subproperty, &PropertyType::ArrayString)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::ArrayString(values.clone()));
Ok(format!("hasAny({}, ?)", column))
}
QueryExpr::ArrayDoesNotContain { property, subproperty, values } => {
if values.is_empty() {
return Err(UserQueryError::EmptyArrayValues("ArrayDoesNotContain".to_string()));
}
self.validate_property_access(property, subproperty, &PropertyType::ArrayString)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::ArrayString(values.clone()));
Ok(format!("NOT hasAny({}, ?)", column))
}
QueryExpr::NumberEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Number)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Number(*value));
Ok(format!("{} = ?", column))
}
QueryExpr::NumberNotEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Number)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Number(*value));
Ok(format!("{} != ?", column))
}
QueryExpr::NumberLess { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Number)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Number(*value));
Ok(format!("{} < ?", column))
}
QueryExpr::NumberLessOrEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Number)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Number(*value));
Ok(format!("{} <= ?", column))
}
QueryExpr::NumberGreater { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Number)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Number(*value));
Ok(format!("{} > ?", column))
}
QueryExpr::NumberGreaterOrEqual { property, subproperty, value } => {
self.validate_property_access(property, subproperty, &PropertyType::Number)?;
let column = self.build_column_reference(property, subproperty)?;
self.bind_values.push(BindValue::Number(*value));
Ok(format!("{} >= ?", column))
}
}
}

fn validate_property_access(
&self,
property: &str,
subproperty: &Option<String>,
expected_type: &PropertyType,
) -> Result<()> {
let prop = self.schema.get_property(property)
.ok_or_else(|| UserQueryError::PropertyNotFound(property.to_string()))?;

if subproperty.is_some() && !prop.supports_subproperties {
return Err(UserQueryError::SubpropertiesNotSupported(property.to_string()));
}

if &prop.ty != expected_type {
return Err(UserQueryError::PropertyTypeMismatch(
property.to_string(),
expected_type.type_name().to_string(),
prop.ty.type_name().to_string(),
));
}

Ok(())
}

fn build_column_reference(&self, property: &str, subproperty: &Option<String>) -> Result<String> {
let property_ident = Identifier(property);

match subproperty {
Some(subprop) => {
// For ClickHouse Map access, use string literal syntax
Ok(format!("{}[{}]", property_ident.0, format!("'{}'", subprop.replace("'", "\\'"))))
}
None => Ok(property_ident.0.to_string()),
Comment on lines +196 to +203
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Identifier struct is created but then its inner field .0 is accessed directly, bypassing any SQL escaping that the Identifier type would provide. This creates a potential SQL injection vulnerability. Consider using the Identifier's proper display/formatting implementation or a dedicated escaping method instead of direct field access. This would ensure property names are properly escaped in the generated SQL.

Suggested change
let property_ident = Identifier(property);
match subproperty {
Some(subprop) => {
// For ClickHouse Map access, use string literal syntax
Ok(format!("{}[{}]", property_ident.0, format!("'{}'", subprop.replace("'", "\\'"))))
}
None => Ok(property_ident.0.to_string()),
let property_ident = Identifier(property);
match subproperty {
Some(subprop) => {
// For ClickHouse Map access, use string literal syntax
Ok(format!("{}[{}]", property_ident, format!("'{}'", subprop.replace("'", "\\'"))))
}
None => Ok(property_ident.to_string()),

Spotted by Diamond

Is this helpful? React 👍 or 👎 to let us know.

}
}
}

24 changes: 24 additions & 0 deletions packages/common/clickhouse-user-query/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use thiserror::Error;

#[derive(Error, Debug)]
pub enum UserQueryError {
#[error("Property '{0}' not found in schema")]
PropertyNotFound(String),

#[error("Property '{0}' does not support subproperties")]
SubpropertiesNotSupported(String),

#[error("Property '{0}' type mismatch: expected {1}, got {2}")]
PropertyTypeMismatch(String, String, String),

#[error("Invalid property or subproperty name '{0}': must contain only alphanumeric characters and underscores")]
InvalidPropertyName(String),

#[error("Empty query expression")]
EmptyQuery,

#[error("Empty array values in {0} operation")]
EmptyArrayValues(String),
}

pub type Result<T> = std::result::Result<T, UserQueryError>;
60 changes: 60 additions & 0 deletions packages/common/clickhouse-user-query/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//! Safe ClickHouse user-defined query builder
//!
//! This crate provides a safe way to build ClickHouse queries from user-defined expressions
//! while protecting against SQL injection attacks. All user inputs are properly validated
//! and bound using parameterized queries.
//!
//! # Example
//!
//! ```rust
//! use clickhouse_user_query::*;
//!
//! // Define the schema of allowed properties
//! let schema = Schema::new(vec![
//! Property::new("user_id".to_string(), false, PropertyType::String).unwrap(),
//! Property::new("metadata".to_string(), true, PropertyType::String).unwrap(),
//! Property::new("active".to_string(), false, PropertyType::Bool).unwrap(),
//! Property::new("tags".to_string(), false, PropertyType::ArrayString).unwrap(),
//! ]).unwrap();
//!
//! // Build a complex query expression
//! let query_expr = QueryExpr::And {
//! exprs: vec![
//! QueryExpr::StringEqual {
//! property: "user_id".to_string(),
//! subproperty: None,
//! value: "12345".to_string(),
//! },
//! QueryExpr::BoolEqual {
//! property: "active".to_string(),
//! subproperty: None,
//! value: true,
//! },
//! QueryExpr::ArrayContains {
//! property: "tags".to_string(),
//! subproperty: None,
//! values: vec!["premium".to_string(), "verified".to_string()],
//! },
//! ],
//! };
//!
//! // Create the safe query builder
//! let builder = UserDefinedQueryBuilder::new(&schema, &query_expr).unwrap();
//!
//! // Use with ClickHouse client (commented out since clickhouse client not available in tests)
//! // let query = clickhouse::Client::default()
//! // .query("SELECT * FROM users WHERE ?")
//! // .bind(builder.where_expr());
//! // let final_query = builder.bind_to(query);
//! ```

// Re-export all public types for convenience
pub use builder::UserDefinedQueryBuilder;
pub use error::{Result, UserQueryError};
pub use query::QueryExpr;
pub use schema::{Property, PropertyType, Schema};

pub mod builder;
pub mod error;
pub mod query;
pub mod schema;
73 changes: 73 additions & 0 deletions packages/common/clickhouse-user-query/src/query.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
use serde::{Deserialize, Serialize};

#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum QueryExpr {
And {
exprs: Vec<QueryExpr>,
},
Or {
exprs: Vec<QueryExpr>,
},
BoolEqual {
property: String,
subproperty: Option<String>,
value: bool,
},
BoolNotEqual {
property: String,
subproperty: Option<String>,
value: bool,
},
StringEqual {
property: String,
subproperty: Option<String>,
value: String,
},
StringNotEqual {
property: String,
subproperty: Option<String>,
value: String,
},
ArrayContains {
property: String,
subproperty: Option<String>,
values: Vec<String>,
},
ArrayDoesNotContain {
property: String,
subproperty: Option<String>,
values: Vec<String>,
},
NumberEqual {
property: String,
subproperty: Option<String>,
value: f64,
},
NumberNotEqual {
property: String,
subproperty: Option<String>,
value: f64,
},
NumberLess {
property: String,
subproperty: Option<String>,
value: f64,
},
NumberLessOrEqual {
property: String,
subproperty: Option<String>,
value: f64,
},
NumberGreater {
property: String,
subproperty: Option<String>,
value: f64,
},
NumberGreaterOrEqual {
property: String,
subproperty: Option<String>,
value: f64,
},
}

Loading
Loading