From 0abe257f6410e5934022fd4160320eec105a4810 Mon Sep 17 00:00:00 2001 From: Emir Karamehmetoglu Date: Thu, 14 Aug 2025 17:40:28 +0200 Subject: [PATCH 1/2] Add failing replicated persist docs test failure requires `test_replica` cluster to be used. --- .../clickhouse/test_clickhouse_comments.py | 121 +++++++++++++++++- 1 file changed, 120 insertions(+), 1 deletion(-) diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_comments.py b/tests/integration/adapter/clickhouse/test_clickhouse_comments.py index 5179954a..3a0bb93d 100644 --- a/tests/integration/adapter/clickhouse/test_clickhouse_comments.py +++ b/tests/integration/adapter/clickhouse/test_clickhouse_comments.py @@ -18,6 +18,21 @@ """ +ref_models__replicated_table_comment_sql = """ +{{ + config( + materialized = "table", + persist_docs = {"relation": true, "columns": true}, + engine="ReplicatedMergeTree('/clickhouse/tables/{uuid}/one-shard', '{replica}' )" + ) +}} + +select + 'foo' as first_name, + 'bar' as second_name + +""" + ref_models__view_comment_sql = """ {{ config( @@ -43,6 +58,13 @@ description: "XXX first description" - name: second_name description: "XXX second description" + - name: replicated_table_comment + description: "YYY table" + columns: + - name: first_name + description: "XXX first description" + - name: second_name + description: "XXX second description" - name: view_comment description: "YYY view" columns: @@ -59,12 +81,13 @@ def models(self): return { "schema.yml": ref_models__schema_yml, "table_comment.sql": ref_models__table_comment_sql, + "replicated_table_comment.sql": ref_models__replicated_table_comment_sql, "view_comment.sql": ref_models__view_comment_sql, } @pytest.mark.parametrize( 'model_name', - ['table_comment', 'view_comment'], + ["table_comment", "replicated_table_comment", "view_comment"], ) def test_comment(self, project, model_name): if os.environ.get('DBT_CH_TEST_CLOUD', '').lower() in ('1', 'true', 'yes'): @@ -81,3 +104,99 @@ def test_comment(self, project, model_name): assert column_comment.startswith("XXX") assert column_node['metadata']['comment'].startswith("YYY") + + # Ensure comment is propoagated to all replicas on cluster + #cluster = project.test_config['cluster'] + +# local_relation = relation_from_name(project.adapter, model_name) +# if cluster: +# ensure_column_comments_consistent_across_replicas(project, cluster, local_relation) +# ensure_table_comment_on_cluster(project, cluster, local_relation) + + + +# def ensure_table_comment_on_cluster(project, cluster, local_relation): +# """Ensure all replicas have same comment for given relation""" +# # Returns 'ok' if exactly one distinct comment exists across all replicas for this table; otherwise 'mismatch'. +# sql = f""" +# SELECT +# if(COUNT(DISTINCT comment) = 1, 'ok', 'mismatch') AS status +# FROM clusterAllReplicas('{cluster}', system.tables) +# WHERE database = currentDatabase() +# AND name = '{local_relation.identifier}' +# """ +# result = project.run_sql(sql, fetch="one") +# assert result[0] == "ok" + +# sql = f""" +# SELECT +# hostname(), +# comment +# FROM clusterAllReplicas('{cluster}', system.tables) +# WHERE `table` = '{local_relation.identifier}' +# """ + +# result = project.run_sql(sql, fetch="all") + +# for _table_row in result: +# assert _table_row[-1].startswith("YYY") + +# def ensure_column_comments_consistent_across_replicas(project, cluster, local_relation): +# # This query groups by column name and checks that each has exactly one distinct comment across replicas. +# check_sql = f""" +# SELECT +# name AS column_name, +# COUNT(DISTINCT comment) AS distinct_comment_count, +# groupArray((hostName(), comment)) AS per_replica_comments +# FROM clusterAllReplicas('{cluster}', system.columns) +# WHERE database = currentDatabase() +# AND table = '{local_relation.identifier}' +# GROUP BY column_name +# """ +# rows = project.run_sql(check_sql, fetch="all") + +# mismatches = [r for r in rows if r[1] != 1] +# if mismatches: +# print("Column comment mismatches:", mismatches) + +# assert not mismatches + +# sql = f""" +# SELECT +# name, +# groupArray(hostname()) as hosts, +# groupUniqArray(comment) as comments, +# length(comments) as num_comments +# FROM clusterAllReplicas('{cluster}', system.columns) +# WHERE table = '{local_relation.identifier}' +# GROUP BY name +# """ + +# result = project.run_sql(sql, fetch="all") + +# print("WOW"*100) +# print("\n\n") +# print(result) +# print("WOW"*100) + +# for _col in result: +# assert _col[-1] == 1 + +# assert result == [] + + +# sql = f""" +# SELECT +# name, +# count(hostname()) +# FROM clusterAllReplicas('{cluster}', system.columns) +# WHERE table = '{local_relation.identifier}' +# GROUP BY name +# """ +# result = project.run_sql(sql, fetch="all") +# assert result[-1] == NUM_CLUSTER_NODES + +# assert result == [] + + + From 671f7d0df5f8805d7424b433628ea10da2726a25 Mon Sep 17 00:00:00 2001 From: Emir Karamehmetoglu Date: Thu, 14 Aug 2025 12:24:35 +0200 Subject: [PATCH 2/2] fix: Split persist docs into column and table comments The previous version of this macro blended the `MODIFY COMMENT` command for the table comment with the `COMMENT COLUMN` commands for the column comments, inside the same `ALTER TABLE`. This combined syntax **does not work** with `ON CLUSTER`. (This was thought to be a clickhouse bug, but that bug is fixed and the behavior remains.) The revised macro now sends a separate `ALTER TABLE` for the table comment `MODIFY COMMENT`, and a separate one for all the `COMMENT COLUMN` commands. I've split it up into 2 helper macros that accomplish each task for maintainability. This also works on non-cluster setups so there is no need to further complicate it by combining table and column comment alterations into a single ALTER. `clickhouse__persist_docs` now runs the relation (table comment) helper macro if `persist_relations_docs()` is set and descriptions exist for the relation and the column comment helper macro if `persist_column_docs()` and columns are defined in the model. Additionally, the formatting was cleaned up for multiple column comments so that comments for a different column are not appearing on the same line. Just makes it easier to read in clickhouse/dbt logs. Example: It produced comments like: ```sql alter table [ON CLUSTER] modify comment $dbt_comment_literal_block$
$dbt_comment_literal_block$, comment column `col1_name` $dbt_comment_literal_block$ $dbt_comment_lieteral_block$, comment column `col2_name` ... ``` A bit messy, but works on a single node. Although `ON CLUSTER` is correctly added, this syntax is incorrect and ONLY produces table and column comments for the initiator node. Now it does: ```sql alter table
[ON CLUSTER] modify comment $dbt_comment_literal_block$
$dbt_comment_literal_block$ alter table
[ON CLUSTER] comment column `col1_name` $dbt_comment_literal_block$$dbt_comment_literal_block$, comment column `col2_name` $dbt_comment$ $dbt_comment$, ... ``` --- .../clickhouse/macros/persist_docs.sql | 48 ++++++---- .../clickhouse/test_clickhouse_comments.py | 96 ------------------- 2 files changed, 32 insertions(+), 112 deletions(-) diff --git a/dbt/include/clickhouse/macros/persist_docs.sql b/dbt/include/clickhouse/macros/persist_docs.sql index 442ae2be..f32be890 100644 --- a/dbt/include/clickhouse/macros/persist_docs.sql +++ b/dbt/include/clickhouse/macros/persist_docs.sql @@ -10,29 +10,45 @@ alter table {{ relation }} {{ on_cluster_clause(relation) }} modify comment '{{ comment }}' {% endmacro %} -{% macro clickhouse__persist_docs(relation, model, for_relation, for_columns) %} - {%- set alter_comments = [] %} - +{% macro clickhouse__persist_docs(relation, model, for_relation, for_columns) -%} + {# Persist table comment if enabled and description provided #} {%- if for_relation and config.persist_relation_docs() and model.description -%} - {% set escaped_comment = clickhouse_escape_comment(model.description) %} - {% do alter_comments.append("modify comment {comment}".format(comment=escaped_comment)) %} + {{ _persist_table_comment(relation, model.description) }} {%- endif -%} + {#- Persist column comments if enabled and columns defined -#} {%- if for_columns and config.persist_column_docs() and model.columns -%} - {% set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list %} - {% for column_name in model.columns if (column_name in existing_columns) %} - {%- set comment = model.columns[column_name]['description'] -%} - {%- if comment %} - {% set escaped_comment = clickhouse_escape_comment(comment) %} - {% do alter_comments.append("comment column `{column_name}` {comment}".format(column_name=column_name, comment=escaped_comment)) %} - {%- endif %} - {%- endfor -%} + {{ _persist_column_comments(relation, model.columns) }} {%- endif -%} +{%- endmacro %} + +{#- Helper macro: persist the table comment for a ClickHouse relation. -#} +{% macro _persist_table_comment(relation, description) -%} + {#- Escape the description to be safe for ClickHouse #} + {%- set escaped_comment = clickhouse_escape_comment(description) -%} + {#- Build and run the ALTER TABLE ... MODIFY COMMENT statement -#} + {%- set sql = "modify comment {comment}".format(comment=escaped_comment) -%} + {{ run_query(one_alter_relation(relation, sql)) }} +{%- endmacro %} - {%- if alter_comments | length > 0 -%} - {% do run_query(one_alter_relation(relation, alter_comments|join(', '))) %} +{#- Helper macro: persist comments for multiple columns on a ClickHouse table. + relation: target table relation + columns: dict mapping column names to metadata (including 'description') -#} +{% macro _persist_column_comments(relation, columns) -%} + {#- Gather existing columns in the relation to avoid altering non-existent ones -#} + {%- set existing_columns = adapter.get_columns_in_relation(relation) | map(attribute="name") | list -%} + {#- Collect ALTER statements for each column with a description -#} + {%- set alterations = [] -%} + {%- for column_name, info in columns.items() if info.description and column_name in existing_columns -%} + {%- set escaped_comment = clickhouse_escape_comment(info.description) -%} + {%- do alterations.append("\ncomment column `{column_name}` {comment}".format(column_name=column_name, comment=escaped_comment)) -%} + {%- endfor -%} + {#- Execute a single ALTER TABLE statement for all column comments -#} + {%- if alterations -%} + {{ run_query(one_alter_relation(relation, alterations | join(", "))) }} {%- endif -%} -{% endmacro %} +{%- endmacro %} + {# By using dollar-quoting like this, users can embed anything they want into their comments diff --git a/tests/integration/adapter/clickhouse/test_clickhouse_comments.py b/tests/integration/adapter/clickhouse/test_clickhouse_comments.py index 3a0bb93d..d82224fc 100644 --- a/tests/integration/adapter/clickhouse/test_clickhouse_comments.py +++ b/tests/integration/adapter/clickhouse/test_clickhouse_comments.py @@ -104,99 +104,3 @@ def test_comment(self, project, model_name): assert column_comment.startswith("XXX") assert column_node['metadata']['comment'].startswith("YYY") - - # Ensure comment is propoagated to all replicas on cluster - #cluster = project.test_config['cluster'] - -# local_relation = relation_from_name(project.adapter, model_name) -# if cluster: -# ensure_column_comments_consistent_across_replicas(project, cluster, local_relation) -# ensure_table_comment_on_cluster(project, cluster, local_relation) - - - -# def ensure_table_comment_on_cluster(project, cluster, local_relation): -# """Ensure all replicas have same comment for given relation""" -# # Returns 'ok' if exactly one distinct comment exists across all replicas for this table; otherwise 'mismatch'. -# sql = f""" -# SELECT -# if(COUNT(DISTINCT comment) = 1, 'ok', 'mismatch') AS status -# FROM clusterAllReplicas('{cluster}', system.tables) -# WHERE database = currentDatabase() -# AND name = '{local_relation.identifier}' -# """ -# result = project.run_sql(sql, fetch="one") -# assert result[0] == "ok" - -# sql = f""" -# SELECT -# hostname(), -# comment -# FROM clusterAllReplicas('{cluster}', system.tables) -# WHERE `table` = '{local_relation.identifier}' -# """ - -# result = project.run_sql(sql, fetch="all") - -# for _table_row in result: -# assert _table_row[-1].startswith("YYY") - -# def ensure_column_comments_consistent_across_replicas(project, cluster, local_relation): -# # This query groups by column name and checks that each has exactly one distinct comment across replicas. -# check_sql = f""" -# SELECT -# name AS column_name, -# COUNT(DISTINCT comment) AS distinct_comment_count, -# groupArray((hostName(), comment)) AS per_replica_comments -# FROM clusterAllReplicas('{cluster}', system.columns) -# WHERE database = currentDatabase() -# AND table = '{local_relation.identifier}' -# GROUP BY column_name -# """ -# rows = project.run_sql(check_sql, fetch="all") - -# mismatches = [r for r in rows if r[1] != 1] -# if mismatches: -# print("Column comment mismatches:", mismatches) - -# assert not mismatches - -# sql = f""" -# SELECT -# name, -# groupArray(hostname()) as hosts, -# groupUniqArray(comment) as comments, -# length(comments) as num_comments -# FROM clusterAllReplicas('{cluster}', system.columns) -# WHERE table = '{local_relation.identifier}' -# GROUP BY name -# """ - -# result = project.run_sql(sql, fetch="all") - -# print("WOW"*100) -# print("\n\n") -# print(result) -# print("WOW"*100) - -# for _col in result: -# assert _col[-1] == 1 - -# assert result == [] - - -# sql = f""" -# SELECT -# name, -# count(hostname()) -# FROM clusterAllReplicas('{cluster}', system.columns) -# WHERE table = '{local_relation.identifier}' -# GROUP BY name -# """ -# result = project.run_sql(sql, fetch="all") -# assert result[-1] == NUM_CLUSTER_NODES - -# assert result == [] - - -