Skip to content

Commit 2be38ef

Browse files
committed
refactor doris
1 parent 380a969 commit 2be38ef

File tree

4 files changed

+127
-53
lines changed

4 files changed

+127
-53
lines changed

sqlmesh/core/engine_adapter/base.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2555,22 +2555,23 @@ def _get_data_objects(
25552555
"""
25562556
raise NotImplementedError()
25572557

2558+
def _get_temp_table_name(self, table: TableName) -> str:
2559+
table_obj = exp.to_table(table)
2560+
return f"__temp_{table_obj.name}_{random_id(short=True)}"
2561+
25582562
def _get_temp_table(
25592563
self,
25602564
table: TableName,
25612565
table_only: bool = False,
25622566
quoted: bool = True,
2563-
start_with: str = "__",
25642567
) -> exp.Table:
25652568
"""
25662569
Returns the name of the temp table that should be used for the given table name.
25672570
"""
25682571
table = t.cast(exp.Table, exp.to_table(table).copy())
25692572
table.set(
25702573
"this",
2571-
exp.to_identifier(
2572-
f"{start_with}temp_{table.name}_{random_id(short=True)}", quoted=quoted
2573-
),
2574+
exp.to_identifier(self._get_temp_table_name(table), quoted=quoted),
25742575
)
25752576

25762577
if table_only:

sqlmesh/core/engine_adapter/doris.py

Lines changed: 96 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
set_catalog,
2222
)
2323
from sqlmesh.core.schema_diff import SchemaDiffer
24+
from sqlmesh.utils import random_id
2425
from sqlmesh.utils.errors import (
2526
SQLMeshError,
2627
)
@@ -34,7 +35,9 @@
3435

3536

3637
@set_catalog()
37-
class DorisEngineAdapter(LogicalMergeMixin, PandasNativeFetchDFSupportMixin, NonTransactionalTruncateMixin):
38+
class DorisEngineAdapter(
39+
LogicalMergeMixin, PandasNativeFetchDFSupportMixin, NonTransactionalTruncateMixin
40+
):
3841
DIALECT = "doris"
3942
DEFAULT_BATCH_SIZE = 200
4043
SUPPORTS_TRANSACTIONS = False # Doris doesn't support transactions
@@ -224,7 +227,9 @@ def _create_materialized_view(
224227
import pandas as pd
225228

226229
if isinstance(query_or_df, pd.DataFrame):
227-
values: t.List[t.Tuple[t.Any, ...]] = list(query_or_df.itertuples(index=False, name=None))
230+
values: t.List[t.Tuple[t.Any, ...]] = list(
231+
query_or_df.itertuples(index=False, name=None)
232+
)
228233
target_columns_to_types, source_columns = self._columns_to_types(
229234
query_or_df, target_columns_to_types, source_columns
230235
)
@@ -304,12 +309,16 @@ def _create_materialized_view(
304309
doris_inline_clauses.append(f"BUILD {build_value}")
305310
refresh = view_properties.get("refresh")
306311
if refresh is not None:
307-
refresh_value = refresh.this if isinstance(refresh, exp.Literal) else str(refresh)
312+
refresh_value = (
313+
refresh.this if isinstance(refresh, exp.Literal) else str(refresh)
314+
)
308315
doris_inline_clauses.append(f"REFRESH {refresh_value}")
309316
refresh_trigger = view_properties.get("refresh_trigger")
310317
if refresh_trigger is not None:
311318
refresh_trigger_value = (
312-
refresh_trigger.this if isinstance(refresh_trigger, exp.Literal) else str(refresh_trigger)
319+
refresh_trigger.this
320+
if isinstance(refresh_trigger, exp.Literal)
321+
else str(refresh_trigger)
313322
)
314323
doris_inline_clauses.append(str(refresh_trigger_value))
315324
# KEY / DUPLICATE KEY clause
@@ -339,7 +348,9 @@ def _create_materialized_view(
339348
doris_inline_clauses.append(f"DUPLICATE KEY ({', '.join(cols)})")
340349
# COMMENT clause
341350
if table_description:
342-
doris_inline_clauses.append(f"COMMENT '{self._truncate_table_comment(table_description)}'")
351+
doris_inline_clauses.append(
352+
f"COMMENT '{self._truncate_table_comment(table_description)}'"
353+
)
343354
# PARTITION BY (inline for Doris MV)
344355
if partitioned_by:
345356
part_cols = ", ".join(
@@ -349,7 +360,9 @@ def _create_materialized_view(
349360
if isinstance(col, exp.Expression)
350361
else exp.to_column(col).sql(dialect=self.dialect, identify=True)
351362
)
352-
for col in (partitioned_by if isinstance(partitioned_by, list) else [partitioned_by])
363+
for col in (
364+
partitioned_by if isinstance(partitioned_by, list) else [partitioned_by]
365+
)
353366
]
354367
)
355368
doris_inline_clauses.append(f"PARTITION BY ({part_cols})")
@@ -398,7 +411,9 @@ def drop_view(
398411
# Remove cascade from kwargs as Doris doesn't support it
399412
if materialized and kwargs.get("view_properties"):
400413
view_properties = kwargs.pop("view_properties")
401-
if view_properties.get("materialized_type") == "SYNC" and view_properties.get("source_table"):
414+
if view_properties.get("materialized_type") == "SYNC" and view_properties.get(
415+
"source_table"
416+
):
402417
# Format the source table name properly for Doris
403418
source_table = view_properties.get("source_table")
404419
if isinstance(source_table, exp.Table):
@@ -432,18 +447,24 @@ def create_table_like(
432447
)
433448
)
434449

435-
def _create_table_comment(self, table_name: TableName, table_comment: str, table_kind: str = "TABLE") -> None:
450+
def _create_table_comment(
451+
self, table_name: TableName, table_comment: str, table_kind: str = "TABLE"
452+
) -> None:
436453
table_sql = exp.to_table(table_name).sql(dialect=self.dialect, identify=True)
437454

438-
self.execute(f'ALTER TABLE {table_sql} MODIFY COMMENT "{self._truncate_table_comment(table_comment)}"')
455+
self.execute(
456+
f'ALTER TABLE {table_sql} MODIFY COMMENT "{self._truncate_table_comment(table_comment)}"'
457+
)
439458

440459
def _build_create_comment_column_exp(
441460
self, table: exp.Table, column_name: str, column_comment: str, table_kind: str = "TABLE"
442461
) -> exp.Comment | str:
443462
table_sql = table.sql(dialect=self.dialect, identify=True)
444463
return f'ALTER TABLE {table_sql} MODIFY COLUMN {column_name} COMMENT "{self._truncate_column_comment(column_comment)}"'
445464

446-
def delete_from(self, table_name: TableName, where: t.Optional[t.Union[str, exp.Expression]] = None) -> None:
465+
def delete_from(
466+
self, table_name: TableName, where: t.Optional[t.Union[str, exp.Expression]] = None
467+
) -> None:
447468
"""
448469
Delete from a table.
449470
@@ -491,7 +512,11 @@ def _find_subquery_in_condition(
491512

492513
def _is_subquery_expression(self, expr: exp.Expression) -> bool:
493514
"""Check if expression contains a subquery."""
494-
return "query" in expr.args and expr.args["query"] and isinstance(expr.args["query"], exp.Subquery)
515+
return (
516+
"query" in expr.args
517+
and expr.args["query"]
518+
and isinstance(expr.args["query"], exp.Subquery)
519+
)
495520

496521
def _execute_delete_with_subquery(
497522
self, table_name: TableName, subquery_info: t.Tuple[exp.Expression, exp.Expression, bool]
@@ -561,7 +586,9 @@ def _create_table_from_columns(
561586
# Convert primary_key to unique_key for Doris (Doris doesn't support primary keys)
562587
if primary_key and "unique_key" not in table_properties:
563588
# Represent as a Tuple of columns to match downstream handling
564-
table_properties["unique_key"] = exp.Tuple(expressions=[exp.to_column(col) for col in primary_key])
589+
table_properties["unique_key"] = exp.Tuple(
590+
expressions=[exp.to_column(col) for col in primary_key]
591+
)
565592

566593
# Update kwargs with the modified table_properties
567594
kwargs["table_properties"] = table_properties
@@ -606,7 +633,9 @@ def to_raw_sql(expr: t.Union[exp.Literal, exp.Var, str, t.Any]) -> exp.Var:
606633
if partitions:
607634
if isinstance(partitions, exp.Tuple):
608635
create_expressions = [
609-
exp.Var(this=e.this, quoted=False) if isinstance(e, exp.Literal) else to_raw_sql(e)
636+
exp.Var(this=e.this, quoted=False)
637+
if isinstance(e, exp.Literal)
638+
else to_raw_sql(e)
610639
for e in partitions.expressions
611640
]
612641
elif isinstance(partitions, exp.Literal):
@@ -645,13 +674,19 @@ def _build_table_properties_exp(
645674
# Extract column names from Tuple expressions
646675
column_names = []
647676
for expr in unique_key.expressions:
648-
if isinstance(expr, exp.Column) and hasattr(expr, "this") and hasattr(expr.this, "this"):
677+
if (
678+
isinstance(expr, exp.Column)
679+
and hasattr(expr, "this")
680+
and hasattr(expr.this, "this")
681+
):
649682
column_names.append(str(expr.this.this))
650683
elif hasattr(expr, "this"):
651684
column_names.append(str(expr.this))
652685
else:
653686
column_names.append(str(expr))
654-
properties.append(exp.UniqueKeyProperty(expressions=[exp.to_column(k) for k in column_names]))
687+
properties.append(
688+
exp.UniqueKeyProperty(expressions=[exp.to_column(k) for k in column_names])
689+
)
655690
elif isinstance(unique_key, exp.Column):
656691
# Handle as single column
657692
if hasattr(unique_key, "this") and hasattr(unique_key.this, "this"):
@@ -669,26 +704,38 @@ def _build_table_properties_exp(
669704
# Extract column names from Tuple expressions
670705
column_names = []
671706
for expr in duplicate_key.expressions:
672-
if isinstance(expr, exp.Column) and hasattr(expr, "this") and hasattr(expr.this, "this"):
707+
if (
708+
isinstance(expr, exp.Column)
709+
and hasattr(expr, "this")
710+
and hasattr(expr.this, "this")
711+
):
673712
column_names.append(str(expr.this.this))
674713
elif hasattr(expr, "this"):
675714
column_names.append(str(expr.this))
676715
else:
677716
column_names.append(str(expr))
678-
properties.append(exp.DuplicateKeyProperty(expressions=[exp.to_column(k) for k in column_names]))
717+
properties.append(
718+
exp.DuplicateKeyProperty(expressions=[exp.to_column(k) for k in column_names])
719+
)
679720
elif isinstance(duplicate_key, exp.Column):
680721
# Handle as single column
681722
if hasattr(duplicate_key, "this") and hasattr(duplicate_key.this, "this"):
682723
column_name = str(duplicate_key.this.this)
683724
else:
684725
column_name = str(duplicate_key.this)
685-
properties.append(exp.DuplicateKeyProperty(expressions=[exp.to_column(column_name)]))
726+
properties.append(
727+
exp.DuplicateKeyProperty(expressions=[exp.to_column(column_name)])
728+
)
686729
elif isinstance(duplicate_key, str):
687-
properties.append(exp.DuplicateKeyProperty(expressions=[exp.to_column(duplicate_key)]))
730+
properties.append(
731+
exp.DuplicateKeyProperty(expressions=[exp.to_column(duplicate_key)])
732+
)
688733

689734
if table_description:
690735
properties.append(
691-
exp.SchemaCommentProperty(this=exp.Literal.string(self._truncate_table_comment(table_description)))
736+
exp.SchemaCommentProperty(
737+
this=exp.Literal.string(self._truncate_table_comment(table_description))
738+
)
692739
)
693740

694741
# Handle partitioning
@@ -700,10 +747,14 @@ def _build_table_properties_exp(
700747
# Handle literal strings like "RANGE(col)" or "LIST(col)"
701748
if isinstance(expr, exp.Literal) and getattr(expr, "is_string", False):
702749
text = str(expr.this)
703-
match = re.match(r"^\s*(RANGE|LIST)\s*\((.*?)\)\s*$", text, flags=re.IGNORECASE)
750+
match = re.match(
751+
r"^\s*(RANGE|LIST)\s*\((.*?)\)\s*$", text, flags=re.IGNORECASE
752+
)
704753
if match:
705754
inner = match.group(2)
706-
inner_cols = [c.strip().strip("`") for c in inner.split(",") if c.strip()]
755+
inner_cols = [
756+
c.strip().strip("`") for c in inner.split(",") if c.strip()
757+
]
707758
for col in inner_cols:
708759
normalized_partitioned_by.append(exp.to_column(col))
709760
continue
@@ -720,7 +771,11 @@ def _build_table_properties_exp(
720771
key_cols_set = set()
721772
if isinstance(unique_key, exp.Tuple):
722773
for expr in unique_key.expressions:
723-
if isinstance(expr, exp.Column) and hasattr(expr, "this") and hasattr(expr.this, "this"):
774+
if (
775+
isinstance(expr, exp.Column)
776+
and hasattr(expr, "this")
777+
and hasattr(expr.this, "this")
778+
):
724779
key_cols_set.add(str(expr.this.this))
725780
elif hasattr(expr, "this"):
726781
key_cols_set.add(str(expr.this))
@@ -801,12 +856,16 @@ def _build_table_properties_exp(
801856
elif isinstance(expr.expression, exp.Array):
802857
# Handle expressions array
803858
distributed_info[key] = [
804-
str(e.this) for e in expr.expression.expressions if hasattr(e, "this")
859+
str(e.this)
860+
for e in expr.expression.expressions
861+
if hasattr(e, "this")
805862
]
806863
elif isinstance(expr.expression, exp.Tuple):
807864
# Handle expressions tuple (array of strings)
808865
distributed_info[key] = [
809-
str(e.this) for e in expr.expression.expressions if hasattr(e, "this")
866+
str(e.this)
867+
for e in expr.expression.expressions
868+
if hasattr(e, "this")
810869
]
811870
else:
812871
distributed_info[key] = str(expr.expression)
@@ -859,13 +918,19 @@ def _build_table_properties_exp(
859918
)
860919
properties.append(prop)
861920
else:
862-
unique_key_property = next((prop for prop in properties if isinstance(prop, exp.UniqueKeyProperty)), None)
921+
unique_key_property = next(
922+
(prop for prop in properties if isinstance(prop, exp.UniqueKeyProperty)), None
923+
)
863924
if unique_key_property:
864925
# Use the first column from unique_key as the distribution key
865926
if unique_key_property.expressions:
866927
first_col = unique_key_property.expressions[0]
867-
column_name = str(first_col.this) if hasattr(first_col, "this") else str(first_col)
868-
logger.info(f"[Doris] Adding default distributed_by using unique_key column: {column_name}")
928+
column_name = (
929+
str(first_col.this) if hasattr(first_col, "this") else str(first_col)
930+
)
931+
logger.info(
932+
f"[Doris] Adding default distributed_by using unique_key column: {column_name}"
933+
)
869934
properties.append(
870935
exp.DistributedByProperty(
871936
expressions=[exp.to_column(column_name)],
@@ -882,13 +947,9 @@ def _build_table_properties_exp(
882947
return exp.Properties(expressions=properties)
883948
return None
884949

885-
def _get_temp_table(
886-
self, table: TableName, table_only: bool = False, quoted: bool = True, start_with: str = "__"
887-
) -> exp.Table:
888-
"""
889-
Returns the name of the temp table that should be used for the given table name.
890-
"""
891-
return super()._get_temp_table(table, table_only, quoted, start_with="")
950+
def _get_temp_table_name(self, table: TableName) -> str:
951+
table_obj = exp.to_table(table)
952+
return f"temp_{table_obj.name}_{random_id(short=True)}"
892953

893954
def _properties_to_expressions(self, properties: t.Dict[str, t.Any]) -> t.List[exp.Expression]:
894955
"""Convert a dictionary of properties to a list of exp.Property expressions."""

0 commit comments

Comments
 (0)