|
6 | 6 |
|
7 | 7 | from sqlglot import exp, parse_one
|
8 | 8 |
|
9 |
| -from sqlmesh.core.dialect import to_schema |
| 9 | +from sqlmesh.core.dialect import to_schema, transform_values |
10 | 10 |
|
11 | 11 | from sqlmesh.core.engine_adapter.mixins import (
|
12 | 12 | LogicalMergeMixin,
|
|
21 | 21 | set_catalog,
|
22 | 22 | )
|
23 | 23 | from sqlmesh.core.schema_diff import SchemaDiffer
|
24 |
| -from sqlmesh.utils import random_id |
| 24 | +from sqlmesh.utils import random_id, get_source_columns_to_types |
25 | 25 | from sqlmesh.utils.errors import (
|
26 | 26 | SQLMeshError,
|
27 | 27 | )
|
28 | 28 |
|
29 | 29 | if t.TYPE_CHECKING:
|
30 | 30 | from sqlmesh.core._typing import SchemaName, TableName
|
31 |
| - from sqlmesh.core.engine_adapter._typing import QueryOrDF |
| 31 | + from sqlmesh.core.engine_adapter._typing import QueryOrDF, Query |
32 | 32 | from sqlmesh.core.node import IntervalUnit
|
33 | 33 |
|
34 | 34 | logger = logging.getLogger(__name__)
|
@@ -136,10 +136,20 @@ def _get_data_objects(
|
136 | 136 | .where(exp.column("table_schema").eq(to_schema(schema_name).db))
|
137 | 137 | )
|
138 | 138 | if object_names:
|
139 |
| - query = query.where(exp.column("table_name").isin(*object_names)) |
| 139 | + # Doris may treat information_schema table_name comparisons as case-sensitive depending on settings. |
| 140 | + # Use LOWER(table_name) to match case-insensitively. |
| 141 | + lowered_names = [name.lower() for name in object_names] |
| 142 | + query = query.where(exp.func("LOWER", exp.column("table_name")).isin(*lowered_names)) |
140 | 143 |
|
141 | 144 | result = []
|
142 |
| - for schema_val, name_val, type_val in self.fetchall(query): |
| 145 | + rows = self.fetchall(query) |
| 146 | + logger.debug( |
| 147 | + "[Doris] _get_data_objects schema=%s object_names=%s -> %d rows", |
| 148 | + schema_name, |
| 149 | + list(object_names) if object_names else None, |
| 150 | + len(rows), |
| 151 | + ) |
| 152 | + for schema_val, name_val, type_val in rows: |
143 | 153 | try:
|
144 | 154 | schema = str(schema_val) if schema_val is not None else str(schema_name)
|
145 | 155 | name = str(name_val) if name_val is not None else "unknown"
|
@@ -396,6 +406,7 @@ def _create_materialized_view(
|
396 | 406 | insert_pos = 0
|
397 | 407 | create_sql = f"{create_sql[:insert_pos]} {insert_text}{create_sql[insert_pos:]}"
|
398 | 408 |
|
| 409 | + logger.debug("[Doris] CREATE MATERIALIZED VIEW SQL: %s", create_sql) |
399 | 410 | self.execute(create_sql)
|
400 | 411 |
|
401 | 412 | def drop_view(
|
@@ -559,6 +570,115 @@ def _build_column_condition(self, column: exp.Expression, is_not_in: bool) -> ex
|
559 | 570 | t2_col = exp.column(column.name, table="_t2")
|
560 | 571 | return t1_col.neq(t2_col) if is_not_in else t1_col.eq(t2_col)
|
561 | 572 |
|
| 573 | + def replace_query( |
| 574 | + self, |
| 575 | + table_name: "TableName", |
| 576 | + query_or_df: "QueryOrDF", |
| 577 | + target_columns_to_types: t.Optional[t.Dict[str, exp.DataType]] = None, |
| 578 | + table_description: t.Optional[str] = None, |
| 579 | + column_descriptions: t.Optional[t.Dict[str, str]] = None, |
| 580 | + source_columns: t.Optional[t.List[str]] = None, |
| 581 | + **kwargs: t.Any, |
| 582 | + ) -> None: |
| 583 | + """ |
| 584 | + Doris does not support REPLACE TABLE. Avoid CTAS on replace and always perform a |
| 585 | + delete+insert (or engine strategy) to ensure data is written even if the table exists. |
| 586 | + """ |
| 587 | + logger.debug( |
| 588 | + "[Doris] replace_query target=%s source_columns=%s", |
| 589 | + table_name, |
| 590 | + source_columns, |
| 591 | + ) |
| 592 | + target_table = exp.to_table(table_name) |
| 593 | + source_queries, inferred_columns_to_types = self._get_source_queries_and_columns_to_types( |
| 594 | + query_or_df, |
| 595 | + target_columns_to_types, |
| 596 | + target_table=target_table, |
| 597 | + source_columns=source_columns, |
| 598 | + ) |
| 599 | + target_columns_to_types = inferred_columns_to_types or self.columns(target_table) |
| 600 | + logger.debug( |
| 601 | + "[Doris] replace_query using %d source queries; columns=%s", |
| 602 | + len(source_queries), |
| 603 | + list(target_columns_to_types.keys()), |
| 604 | + ) |
| 605 | + # Use the standard insert-overwrite-by-condition path (DELETE/INSERT for Doris by default) |
| 606 | + return self._insert_overwrite_by_condition( |
| 607 | + target_table, |
| 608 | + source_queries, |
| 609 | + target_columns_to_types, |
| 610 | + ) |
| 611 | + |
| 612 | + def _values_to_sql( |
| 613 | + self, |
| 614 | + values: t.List[t.Tuple[t.Any, ...]], |
| 615 | + target_columns_to_types: t.Dict[str, exp.DataType], |
| 616 | + batch_start: int, |
| 617 | + batch_end: int, |
| 618 | + alias: str = "t", |
| 619 | + source_columns: t.Optional[t.List[str]] = None, |
| 620 | + ) -> "Query": |
| 621 | + """ |
| 622 | + Build a SELECT/UNION ALL subquery for a batch of literal rows. |
| 623 | +
|
| 624 | + Doris (MySQL-compatible) doesn't reliably render SQLGlot's VALUES in FROM when using the |
| 625 | + 'doris' dialect, which led to an empty `(SELECT)` subquery. To avoid that, construct a |
| 626 | + dialect-agnostic union of SELECT literals and then cast/order in an outer SELECT. |
| 627 | + """ |
| 628 | + source_columns = source_columns or list(target_columns_to_types) |
| 629 | + source_columns_to_types = get_source_columns_to_types( |
| 630 | + target_columns_to_types, source_columns |
| 631 | + ) |
| 632 | + |
| 633 | + row_values = values[batch_start:batch_end] |
| 634 | + |
| 635 | + inner: exp.Query |
| 636 | + if not row_values: |
| 637 | + # Produce a zero-row subquery with the correct schema |
| 638 | + zero_row_select = exp.select( |
| 639 | + *[ |
| 640 | + exp.cast(exp.null(), to=col_type).as_(col, quoted=True) |
| 641 | + for col, col_type in source_columns_to_types.items() |
| 642 | + ] |
| 643 | + ).where(exp.false()) |
| 644 | + inner = zero_row_select |
| 645 | + else: |
| 646 | + # Build UNION ALL of SELECT <literals AS columns> |
| 647 | + selects: t.List[exp.Select] = [] |
| 648 | + for row in row_values: |
| 649 | + converted_vals = list(transform_values(row, source_columns_to_types)) |
| 650 | + select_exprs = [ |
| 651 | + exp.alias_(val, col, quoted=True) |
| 652 | + for val, col in zip(converted_vals, source_columns_to_types.keys()) |
| 653 | + ] |
| 654 | + selects.append(exp.select(*select_exprs)) |
| 655 | + |
| 656 | + inner = selects[0] |
| 657 | + for s in selects[1:]: |
| 658 | + inner = exp.union(inner, s, distinct=False) |
| 659 | + |
| 660 | + # Outer select to coerce/order target columns |
| 661 | + casted_columns = [ |
| 662 | + exp.alias_( |
| 663 | + exp.cast( |
| 664 | + exp.column(column, table=alias, quoted=True) |
| 665 | + if column in source_columns_to_types |
| 666 | + else exp.Null(), |
| 667 | + to=kind, |
| 668 | + ), |
| 669 | + column, |
| 670 | + quoted=True, |
| 671 | + ) |
| 672 | + for column, kind in target_columns_to_types.items() |
| 673 | + ] |
| 674 | + |
| 675 | + final_query = exp.select(*casted_columns).from_( |
| 676 | + exp.alias_(exp.Subquery(this=inner), alias, table=True), |
| 677 | + copy=False, |
| 678 | + ) |
| 679 | + |
| 680 | + return final_query |
| 681 | + |
562 | 682 | def _create_table_from_columns(
|
563 | 683 | self,
|
564 | 684 | table_name: TableName,
|
|
0 commit comments