Skip to content

Commit 870675d

Browse files
committed
Merge branch 'doris' of https://github.com/xinge-ji/sqlmesh into doris
2 parents 150eba2 + 2088603 commit 870675d

26 files changed

+661
-76
lines changed

sqlmesh/core/context.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,11 @@ def plan_builder(
16771677
end_override_per_model=max_interval_end_per_model,
16781678
console=self.console,
16791679
user_provided_flags=user_provided_flags,
1680+
selected_models={
1681+
dbt_name
1682+
for model in model_selector.expand_model_selections(select_models or "*")
1683+
if (dbt_name := snapshots[model].node.dbt_name)
1684+
},
16801685
explain=explain or False,
16811686
ignore_cron=ignore_cron or False,
16821687
)

sqlmesh/core/environment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,7 @@ def execute_environment_statements(
312312
start: t.Optional[TimeLike] = None,
313313
end: t.Optional[TimeLike] = None,
314314
execution_time: t.Optional[TimeLike] = None,
315+
selected_models: t.Optional[t.Set[str]] = None,
315316
) -> None:
316317
try:
317318
rendered_expressions = [
@@ -327,6 +328,7 @@ def execute_environment_statements(
327328
execution_time=execution_time,
328329
environment_naming_info=environment_naming_info,
329330
engine_adapter=adapter,
331+
selected_models=selected_models,
330332
)
331333
]
332334
except Exception as e:

sqlmesh/core/node.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ class _Node(PydanticModel):
199199
interval_unit_: t.Optional[IntervalUnit] = Field(alias="interval_unit", default=None)
200200
tags: t.List[str] = []
201201
stamp: t.Optional[str] = None
202+
dbt_name: t.Optional[str] = None # dbt node name
202203
_path: t.Optional[Path] = None
203204
_data_hash: t.Optional[str] = None
204205
_metadata_hash: t.Optional[str] = None

sqlmesh/core/plan/builder.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ def __init__(
129129
end_override_per_model: t.Optional[t.Dict[str, datetime]] = None,
130130
console: t.Optional[PlanBuilderConsole] = None,
131131
user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None,
132+
selected_models: t.Optional[t.Set[str]] = None,
132133
):
133134
self._context_diff = context_diff
134135
self._no_gaps = no_gaps
@@ -169,6 +170,7 @@ def __init__(
169170
self._console = console or get_console()
170171
self._choices: t.Dict[SnapshotId, SnapshotChangeCategory] = {}
171172
self._user_provided_flags = user_provided_flags
173+
self._selected_models = selected_models
172174
self._explain = explain
173175

174176
self._start = start
@@ -347,6 +349,7 @@ def build(self) -> Plan:
347349
ensure_finalized_snapshots=self._ensure_finalized_snapshots,
348350
ignore_cron=self._ignore_cron,
349351
user_provided_flags=self._user_provided_flags,
352+
selected_models=self._selected_models,
350353
)
351354
self._latest_plan = plan
352355
return plan

sqlmesh/core/plan/definition.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ class Plan(PydanticModel, frozen=True):
7070
execution_time_: t.Optional[TimeLike] = Field(default=None, alias="execution_time")
7171

7272
user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None
73+
selected_models: t.Optional[t.Set[str]] = None
74+
"""Models that have been selected for this plan (used for dbt selected_resources)"""
7375

7476
@cached_property
7577
def start(self) -> TimeLike:
@@ -282,6 +284,7 @@ def to_evaluatable(self) -> EvaluatablePlan:
282284
},
283285
environment_statements=self.context_diff.environment_statements,
284286
user_provided_flags=self.user_provided_flags,
287+
selected_models=self.selected_models,
285288
)
286289

287290
@cached_property
@@ -319,6 +322,7 @@ class EvaluatablePlan(PydanticModel):
319322
disabled_restatement_models: t.Set[str]
320323
environment_statements: t.Optional[t.List[EnvironmentStatements]] = None
321324
user_provided_flags: t.Optional[t.Dict[str, UserProvidedFlags]] = None
325+
selected_models: t.Optional[t.Set[str]] = None
322326

323327
def is_selected_for_backfill(self, model_fqn: str) -> bool:
324328
return self.models_to_backfill is None or model_fqn in self.models_to_backfill

sqlmesh/core/plan/evaluator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ def visit_before_all_stage(self, stage: stages.BeforeAllStage, plan: Evaluatable
137137
start=plan.start,
138138
end=plan.end,
139139
execution_time=plan.execution_time,
140+
selected_models=plan.selected_models,
140141
)
141142

142143
def visit_after_all_stage(self, stage: stages.AfterAllStage, plan: EvaluatablePlan) -> None:
@@ -150,6 +151,7 @@ def visit_after_all_stage(self, stage: stages.AfterAllStage, plan: EvaluatablePl
150151
start=plan.start,
151152
end=plan.end,
152153
execution_time=plan.execution_time,
154+
selected_models=plan.selected_models,
153155
)
154156

155157
def visit_create_snapshot_records_stage(
@@ -257,6 +259,7 @@ def visit_backfill_stage(self, stage: stages.BackfillStage, plan: EvaluatablePla
257259
allow_destructive_snapshots=plan.allow_destructive_models,
258260
allow_additive_snapshots=plan.allow_additive_models,
259261
selected_snapshot_ids=stage.selected_snapshot_ids,
262+
selected_models=plan.selected_models,
260263
)
261264
if errors:
262265
raise PlanError("Plan application failed.")

sqlmesh/core/renderer.py

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
SQLMeshError,
3131
raise_config_error,
3232
)
33-
from sqlmesh.utils.jinja import JinjaMacroRegistry
33+
from sqlmesh.utils.jinja import JinjaMacroRegistry, extract_error_details
3434
from sqlmesh.utils.metaprogramming import Executable, prepare_env
3535

3636
if t.TYPE_CHECKING:
@@ -221,36 +221,44 @@ def _resolve_table(table: str | exp.Table) -> str:
221221
dialect=self._dialect, identify=True, comments=False
222222
)
223223

224-
all_refs = list(
225-
self._jinja_macro_registry.global_objs.get("sources", {}).values() # type: ignore
226-
) + list(
227-
self._jinja_macro_registry.global_objs.get("refs", {}).values() # type: ignore
228-
)
229-
for ref in all_refs:
230-
if ref.event_time_filter:
231-
ref.event_time_filter["start"] = render_kwargs["start_tstz"]
232-
ref.event_time_filter["end"] = to_tstz(
233-
make_ts_exclusive(render_kwargs["end_tstz"], dialect=self._dialect)
234-
)
224+
if self._model and self._model.kind.is_incremental_by_time_range:
225+
all_refs = list(
226+
self._jinja_macro_registry.global_objs.get("sources", {}).values() # type: ignore
227+
) + list(
228+
self._jinja_macro_registry.global_objs.get("refs", {}).values() # type: ignore
229+
)
230+
for ref in all_refs:
231+
if ref.event_time_filter:
232+
ref.event_time_filter["start"] = render_kwargs["start_tstz"]
233+
ref.event_time_filter["end"] = to_tstz(
234+
make_ts_exclusive(render_kwargs["end_tstz"], dialect=self._dialect)
235+
)
236+
235237
jinja_env = self._jinja_macro_registry.build_environment(**jinja_env_kwargs)
236238

237239
expressions = []
238240
rendered_expression = jinja_env.from_string(self._expression.name).render()
239241
logger.debug(
240242
f"Rendered Jinja expression for model '{self._model_fqn}' at '{self._path}': '{rendered_expression}'"
241243
)
242-
if rendered_expression.strip():
243-
expressions = [e for e in parse(rendered_expression, read=self._dialect) if e]
244-
245-
if not expressions:
246-
raise ConfigError(f"Failed to parse an expression:\n{self._expression}")
247244
except ParsetimeAdapterCallError:
248245
raise
249246
except Exception as ex:
250247
raise ConfigError(
251-
f"Could not render or parse jinja at '{self._path}'.\n{ex}"
248+
f"Could not render jinja for '{self._path}'.\n" + extract_error_details(ex)
252249
) from ex
253250

251+
if rendered_expression.strip():
252+
try:
253+
expressions = [e for e in parse(rendered_expression, read=self._dialect) if e]
254+
255+
if not expressions:
256+
raise ConfigError(f"Failed to parse an expression:\n{self._expression}")
257+
except Exception as ex:
258+
raise ConfigError(
259+
f"Could not parse the rendered jinja at '{self._path}'.\n{ex}"
260+
) from ex
261+
254262
if this_model:
255263
render_kwargs["this_model"] = this_model
256264

sqlmesh/core/scheduler.py

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,7 @@ def run_merged_intervals(
416416
start: t.Optional[TimeLike] = None,
417417
end: t.Optional[TimeLike] = None,
418418
allow_destructive_snapshots: t.Optional[t.Set[str]] = None,
419+
selected_models: t.Optional[t.Set[str]] = None,
419420
allow_additive_snapshots: t.Optional[t.Set[str]] = None,
420421
selected_snapshot_ids: t.Optional[t.Set[SnapshotId]] = None,
421422
run_environment_statements: bool = False,
@@ -445,7 +446,13 @@ def run_merged_intervals(
445446
if not selected_snapshots:
446447
selected_snapshots = list(merged_intervals)
447448

448-
snapshot_dag = snapshots_to_dag(selected_snapshots)
449+
# Build the full DAG from all snapshots to preserve transitive dependencies
450+
full_dag = snapshots_to_dag(self.snapshots.values())
451+
452+
# Create a subdag that includes the selected snapshots and all their upstream dependencies
453+
# This ensures that transitive dependencies are preserved even when intermediate nodes are not selected
454+
selected_snapshot_ids_set = {s.snapshot_id for s in selected_snapshots}
455+
snapshot_dag = full_dag.subdag(*selected_snapshot_ids_set)
449456

450457
batched_intervals = self.batch_intervals(
451458
merged_intervals, deployability_index, environment_naming_info, dag=snapshot_dag
@@ -472,6 +479,7 @@ def run_merged_intervals(
472479
start=start,
473480
end=end,
474481
execution_time=execution_time,
482+
selected_models=selected_models,
475483
)
476484

477485
# We only need to create physical tables if the snapshot is not representative or if it
@@ -533,6 +541,7 @@ def run_node(node: SchedulingUnit) -> None:
533541
allow_destructive_snapshots=allow_destructive_snapshots,
534542
allow_additive_snapshots=allow_additive_snapshots,
535543
target_table_exists=snapshot.snapshot_id not in snapshots_to_create,
544+
selected_models=selected_models,
536545
)
537546

538547
evaluation_duration_ms = now_timestamp() - execution_start_ts
@@ -602,6 +611,7 @@ def run_node(node: SchedulingUnit) -> None:
602611
start=start,
603612
end=end,
604613
execution_time=execution_time,
614+
selected_models=selected_models,
605615
)
606616

607617
self.state_sync.recycle()
@@ -642,20 +652,11 @@ def _dag(
642652
upstream_dependencies: t.List[SchedulingUnit] = []
643653

644654
for p_sid in snapshot.parents:
645-
if p_sid in self.snapshots:
646-
p_intervals = intervals_per_snapshot.get(p_sid.name, [])
647-
648-
if not p_intervals and p_sid in original_snapshots_to_create:
649-
upstream_dependencies.append(CreateNode(snapshot_name=p_sid.name))
650-
elif len(p_intervals) > 1:
651-
upstream_dependencies.append(DummyNode(snapshot_name=p_sid.name))
652-
else:
653-
for i, interval in enumerate(p_intervals):
654-
upstream_dependencies.append(
655-
EvaluateNode(
656-
snapshot_name=p_sid.name, interval=interval, batch_index=i
657-
)
658-
)
655+
upstream_dependencies.extend(
656+
self._find_upstream_dependencies(
657+
p_sid, intervals_per_snapshot, original_snapshots_to_create
658+
)
659+
)
659660

660661
batch_concurrency = snapshot.node.batch_concurrency
661662
batch_size = snapshot.node.batch_size
@@ -699,6 +700,36 @@ def _dag(
699700
)
700701
return dag
701702

703+
def _find_upstream_dependencies(
704+
self,
705+
parent_sid: SnapshotId,
706+
intervals_per_snapshot: t.Dict[str, Intervals],
707+
snapshots_to_create: t.Set[SnapshotId],
708+
) -> t.List[SchedulingUnit]:
709+
if parent_sid not in self.snapshots:
710+
return []
711+
712+
p_intervals = intervals_per_snapshot.get(parent_sid.name, [])
713+
714+
if p_intervals:
715+
if len(p_intervals) > 1:
716+
return [DummyNode(snapshot_name=parent_sid.name)]
717+
interval = p_intervals[0]
718+
return [EvaluateNode(snapshot_name=parent_sid.name, interval=interval, batch_index=0)]
719+
if parent_sid in snapshots_to_create:
720+
return [CreateNode(snapshot_name=parent_sid.name)]
721+
# This snapshot has no intervals and doesn't need creation which means
722+
# that it can be a transitive dependency
723+
transitive_deps: t.List[SchedulingUnit] = []
724+
parent_snapshot = self.snapshots[parent_sid]
725+
for grandparent_sid in parent_snapshot.parents:
726+
transitive_deps.extend(
727+
self._find_upstream_dependencies(
728+
grandparent_sid, intervals_per_snapshot, snapshots_to_create
729+
)
730+
)
731+
return transitive_deps
732+
702733
def _run_or_audit(
703734
self,
704735
environment: str | EnvironmentNamingInfo,
@@ -808,6 +839,7 @@ def _run_or_audit(
808839
run_environment_statements=run_environment_statements,
809840
audit_only=audit_only,
810841
auto_restatement_triggers=auto_restatement_triggers,
842+
selected_models={s.node.dbt_name for s in merged_intervals if s.node.dbt_name},
811843
)
812844

813845
return CompletionStatus.FAILURE if errors else CompletionStatus.SUCCESS

sqlmesh/core/snapshot/evaluator.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
CustomKind,
5151
)
5252
from sqlmesh.core.model.kind import _Incremental
53-
from sqlmesh.utils import CompletionStatus
53+
from sqlmesh.utils import CompletionStatus, columns_to_types_all_known
5454
from sqlmesh.core.schema_diff import (
5555
has_drop_alteration,
5656
TableAlterOperation,
@@ -747,6 +747,11 @@ def _evaluate_snapshot(
747747
adapter.execute(model.render_pre_statements(**render_statements_kwargs))
748748

749749
if not target_table_exists or (model.is_seed and not snapshot.intervals):
750+
columns_to_types_provided = (
751+
model.kind.is_materialized
752+
and model.columns_to_types_
753+
and columns_to_types_all_known(model.columns_to_types_)
754+
)
750755
if self._can_clone(snapshot, deployability_index):
751756
self._clone_snapshot_in_dev(
752757
snapshot=snapshot,
@@ -759,7 +764,7 @@ def _evaluate_snapshot(
759764
)
760765
runtime_stage = RuntimeStage.EVALUATING
761766
target_table_exists = True
762-
elif model.annotated or model.is_seed or model.kind.is_scd_type_2:
767+
elif columns_to_types_provided or model.is_seed or model.kind.is_scd_type_2:
763768
self._execute_create(
764769
snapshot=snapshot,
765770
table_name=target_table_name,

sqlmesh/dbt/adapter.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ def compare_dbr_version(self, major: int, minor: int) -> int:
168168

169169
@property
170170
def graph(self) -> t.Any:
171-
return AttributeDict(
171+
flat_graph = self.jinja_globals.get("flat_graph", None)
172+
return flat_graph or AttributeDict(
172173
{
173174
"exposures": {},
174175
"groups": {},
@@ -276,10 +277,6 @@ def __init__(
276277
**table_mapping,
277278
}
278279

279-
@property
280-
def graph(self) -> t.Any:
281-
return self.jinja_globals.get("flat_graph", super().graph)
282-
283280
def get_relation(
284281
self, database: t.Optional[str], schema: str, identifier: str
285282
) -> t.Optional[BaseRelation]:

0 commit comments

Comments
 (0)