From e370453dc31270bb3345eae31a588ae6ad03211c Mon Sep 17 00:00:00 2001 From: Kaushik Iska Date: Wed, 30 Jul 2025 09:19:48 -0500 Subject: [PATCH 1/2] Add information around publication creation for postgres and its impications on billing --- .../clickpipes/postgres/source/aurora.md | 8 +- .../source/azure-flexible-server-postgres.md | 8 +- .../postgres/source/crunchy-postgres.md | 8 +- .../clickpipes/postgres/source/generic.md | 4 +- .../postgres/source/google-cloudsql.md | 8 +- .../postgres/source/neon-postgres.md | 8 +- .../postgres/source/publication-management.md | 157 ++++++++++++++++++ .../clickpipes/postgres/source/rds.md | 8 +- .../clickpipes/postgres/source/supabase.md | 8 +- 9 files changed, 209 insertions(+), 8 deletions(-) create mode 100644 docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/aurora.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/aurora.md index 2101871b31c..5e7ccb7459e 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/aurora.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/aurora.md @@ -89,9 +89,15 @@ Connect to your Aurora PostgreSQL writer instance as an admin user and execute t 4. Create a publication for replication: ```sql - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` + :::note + + For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + + ::: + ## Configure network access {#configure-network-access} ### IP-based access control {#ip-based-access-control} diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/azure-flexible-server-postgres.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/azure-flexible-server-postgres.md index 0704d7ed828..ab46a7ff753 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/azure-flexible-server-postgres.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/azure-flexible-server-postgres.md @@ -58,9 +58,15 @@ Connect to your Azure Flexible Server Postgres through the admin user and run th 4. Create publication that you'll be using for creating the MIRROR (replication) in future. ```sql - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` + :::note + + For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + + ::: + 5. Set `wal_sender_timeout` to 0 for `clickpipes_user` ```sql diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/crunchy-postgres.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/crunchy-postgres.md index aa5cbf6e155..3669f0ce0b6 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/crunchy-postgres.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/crunchy-postgres.md @@ -50,9 +50,15 @@ Connect to your Crunchy Bridge Postgres through the `postgres` user and run the 4. Create publication that you'll be using for creating the MIRROR (replication) in future. ```sql - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` + :::note + + For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + + ::: + ## Safe list ClickPipes IPs {#safe-list-clickpipes-ips} Safelist [ClickPipes IPs](../../index.md#list-of-static-ips) by adding the Firewall Rules in Crunchy Bridge. diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/generic.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/generic.md index 3cc69865841..10754e5524e 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/generic.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/generic.md @@ -66,12 +66,14 @@ For this, you can connect to your Postgres instance and run the following SQL co ALTER USER clickpipes_user REPLICATION; -- Create a publication. We will use this when creating the pipe - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` :::note Make sure to replace `clickpipes_user` and `clickpipes_password` with your desired username and password. +For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + ::: ## Enabling connections in pg_hba.conf to the ClickPipes User {#enabling-connections-in-pg_hbaconf-to-the-clickpipes-user} diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/google-cloudsql.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/google-cloudsql.md index bc535d6e9d1..228c1256bf2 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/google-cloudsql.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/google-cloudsql.md @@ -68,9 +68,15 @@ Connect to your Cloud SQL Postgres through the admin user and run the below comm 4. Create publication that you'll be using for creating the MIRROR (replication) in future. ```sql - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` + :::note + + For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + + ::: + [//]: # (TODO Add SSH Tunneling) ## Add ClickPipes IPs to Firewall {#add-clickpipes-ips-to-firewall} diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/neon-postgres.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/neon-postgres.md index 8d332071e7f..4680fff8567 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/neon-postgres.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/neon-postgres.md @@ -35,11 +35,17 @@ Here, we can run the following SQL commands: ALTER USER clickpipes_user REPLICATION; -- Create a publication. We will use this when creating the mirror - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` User and publication commands +:::note + +For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + +::: + Click on **Run** to have a publication and a user ready. ## Enable logical replication {#enable-logical-replication} diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md new file mode 100644 index 00000000000..7de7bdfda48 --- /dev/null +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md @@ -0,0 +1,157 @@ +--- +sidebar_label: 'Publication Management' +description: 'Best practices for managing PostgreSQL publications with ClickPipes' +slug: /integrations/clickpipes/postgres/source/publication-management +title: 'PostgreSQL Publication Management for ClickPipes' +--- + +# PostgreSQL Publication Management for ClickPipes + +This guide covers best practices and considerations for managing PostgreSQL publications when using ClickPipes for data replication. + +## Publication Creation Options {#publication-creation-options} + +You have two main approaches for managing publications with ClickPipes: + +### Option 1: Automatic Publication Creation (Recommended for Simplicity) {#automatic-publication-creation} + +If you **don't specify a publication** during ClickPipe creation, ClickPipes will automatically create a publication scoped to only the tables you select for replication. + +**Requirements:** +- The ClickPipes user must have **table owner permissions** for all tables you want to replicate +- This is the easiest approach for getting started + +**Advantages:** +- No manual publication management required +- Publication is automatically scoped to only selected tables +- No risk of billing for unwanted table data + +**Disadvantages:** +- Requires higher privileges (table owner permissions) +- If you add tables to the pipe later, ClickPipes will need to modify the publication, requiring continued owner permissions + +### Option 2: Manual Publication Creation (Recommended for Production) {#manual-publication-creation} + +Create and manage the publication yourself before setting up the ClickPipe. + +#### Table-Specific Publication (Recommended) {#table-specific-publication} + +```sql +-- Create a publication for specific tables you wish to replicate +-- Replace 'schema.table1', 'schema.table2' with your actual schema and table names +CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; +``` + +**Advantages:** +- Fine-grained control over which tables are included +- Only requires SELECT permissions on tables (not ownership) +- Clear visibility into what data will be replicated +- Optimal billing - only pay for data you actually need + +**Disadvantages:** +- Manual management required +- Must update publication when adding/removing tables from replication + +#### All-Tables Publication (Use with Caution) {#all-tables-publication} + +```sql +-- Creates publication for all current and future tables +CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; +``` + +**Important Considerations:** +- Though ClickPipes will only replicate tables you select in the UI, PostgreSQL will send data changes from **all tables** over the network +- **You will be billed for the extra bytes** from tables you don't actually need +- This approach is generally not recommended for production use + +## Permission Requirements {#permission-requirements} + +### For Automatic Publication Creation {#automatic-permissions} +```sql +-- Grant table ownership (required for automatic publication creation) +ALTER TABLE schema.table1 OWNER TO clickpipes_user; +ALTER TABLE schema.table2 OWNER TO clickpipes_user; +``` + +### For Manual Publication Creation {#manual-permissions} +```sql +-- Minimal permissions (sufficient for manual publication approach) +GRANT USAGE ON SCHEMA "your_schema" TO clickpipes_user; +GRANT SELECT ON TABLE schema.table1, schema.table2 TO clickpipes_user; +ALTER USER clickpipes_user REPLICATION; + +-- If you need to grant permissions for future tables in the schema: +ALTER DEFAULT PRIVILEGES IN SCHEMA "your_schema" GRANT SELECT ON TABLES TO clickpipes_user; +``` + +## Managing Publications Over Time {#managing-publications-over-time} + +### Adding Tables to an Existing Publication {#adding-tables} + +If you need to add tables to your ClickPipe later: + +```sql +-- Add a single table +ALTER PUBLICATION clickpipes_publication ADD TABLE schema.new_table; + +-- Add multiple tables +ALTER PUBLICATION clickpipes_publication ADD TABLE schema.table3, schema.table4; +``` + +### Removing Tables from a Publication {#removing-tables} + +```sql +-- Remove a single table +ALTER PUBLICATION clickpipes_publication DROP TABLE schema.old_table; + +-- Remove multiple tables +ALTER PUBLICATION clickpipes_publication DROP TABLE schema.table3, schema.table4; +``` + +### Viewing Publication Contents {#viewing-publication-contents} + +```sql +-- See which tables are in your publication +SELECT schemaname, tablename +FROM pg_publication_tables +WHERE pubname = 'clickpipes_publication'; + +-- See all publications +SELECT * FROM pg_publication; +``` + +## Best Practices {#best-practices} + +1. **Start with table-specific publications** for production workloads to maintain cost control and clarity + +2. **Use automatic creation for development/testing** when you want to get started quickly and don't mind granting higher privileges + +3. **Plan your table selection carefully** - adding/removing tables later requires publication changes + +4. **Monitor your data usage** to ensure you're only paying for data you actually need + +5. **Document your publication strategy** so team members understand the setup + +## Billing Implications {#billing-implications} + +- **Table-specific publications**: You pay only for data changes from explicitly listed tables +- **All-tables publications**: You pay for data changes from all tables in the database, even if ClickPipes only processes a subset +- **Automatic creation**: You pay only for data from selected tables (most cost-effective) + +## Additional Resources {#additional-resources} + +For more advanced publication options and syntax, see the [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-createpublication.html). + +## Troubleshooting {#troubleshooting} + +### Permission Errors {#permission-errors} +If you encounter permission errors: +1. Verify the ClickPipes user has the required permissions for your chosen approach +2. Check that the publication exists and contains the expected tables +3. Ensure the user has REPLICATION privileges + +### Unexpected Billing {#unexpected-billing} +If you're seeing higher than expected data transfer costs: +1. Check if you're using an all-tables publication unintentionally +2. Verify your publication only contains the tables you need +3. Monitor which tables are actively changing in your database \ No newline at end of file diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/rds.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/rds.md index 8ced731ee65..90b35d30d22 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/rds.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/rds.md @@ -89,9 +89,15 @@ Connect to your RDS Postgres instance as an admin user and execute the following 4. Create a publication for replication: ```sql - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` + :::note + + For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + + ::: + ## Configure network access {#configure-network-access} ### IP-based access control {#ip-based-access-control} diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/supabase.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/supabase.md index cb750c42544..a2eb2cbf645 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/supabase.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/supabase.md @@ -36,11 +36,17 @@ Here, we can run the following SQL commands: ALTER USER clickpipes_user REPLICATION; -- Create a publication. We will use this when creating the mirror - CREATE PUBLICATION clickpipes_publication FOR ALL TABLES; + CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2; ``` User and publication commands +:::note + +For detailed information about publication management, including automatic creation, table-specific vs all-tables publications, and billing implications, see our [Publication Management Guide](./publication-management.md). + +::: + Click on **Run** to have a publication and a user ready. :::note From 79e51a9a217629704c78169f3b2c8537d6a3e942 Mon Sep 17 00:00:00 2001 From: Kaushik Iska Date: Wed, 30 Jul 2025 09:28:55 -0500 Subject: [PATCH 2/2] fix things --- .../postgres/source/publication-management.md | 24 ++++++++++--------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md b/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md index 7de7bdfda48..db07f4b6844 100644 --- a/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md +++ b/docs/integrations/data-ingestion/clickpipes/postgres/source/publication-management.md @@ -13,18 +13,18 @@ This guide covers best practices and considerations for managing PostgreSQL publ You have two main approaches for managing publications with ClickPipes: -### Option 1: Automatic Publication Creation (Recommended for Simplicity) {#automatic-publication-creation} +### Option 1: Automatic Publication Creation (Let ClickPipes Manage It) {#automatic-publication-creation} -If you **don't specify a publication** during ClickPipe creation, ClickPipes will automatically create a publication scoped to only the tables you select for replication. +If you want ClickPipes to handle publication management and not worry about the intricacies of PostgreSQL publications, simply **don't specify a publication** during ClickPipe creation. ClickPipes will automatically create a publication scoped to only the tables you select for replication. **Requirements:** - The ClickPipes user must have **table owner permissions** for all tables you want to replicate -- This is the easiest approach for getting started **Advantages:** - No manual publication management required - Publication is automatically scoped to only selected tables -- No risk of billing for unwanted table data +- Same cost-effective billing as table-specific publications (only pay for selected tables) +- Easiest approach for getting started **Disadvantages:** - Requires higher privileges (table owner permissions) @@ -46,7 +46,7 @@ CREATE PUBLICATION clickpipes_publication FOR TABLE schema.table1, schema.table2 - Fine-grained control over which tables are included - Only requires SELECT permissions on tables (not ownership) - Clear visibility into what data will be replicated -- Optimal billing - only pay for data you actually need +- Same cost-effective billing as automatic creation (only pay for selected tables) **Disadvantages:** - Manual management required @@ -122,21 +122,23 @@ SELECT * FROM pg_publication; ## Best Practices {#best-practices} -1. **Start with table-specific publications** for production workloads to maintain cost control and clarity +1. **Choose either automatic creation or table-specific publications** for production workloads - both provide the same cost control, with automatic being easier and manual providing more control -2. **Use automatic creation for development/testing** when you want to get started quickly and don't mind granting higher privileges +2. **Use automatic creation** when you want ClickPipes to handle publication management and don't mind granting table owner privileges 3. **Plan your table selection carefully** - adding/removing tables later requires publication changes -4. **Monitor your data usage** to ensure you're only paying for data you actually need +4. **Avoid all-tables publications** unless you specifically need all tables - this is the only approach that increases costs -5. **Document your publication strategy** so team members understand the setup +5. **Monitor your data usage** to ensure you're only paying for data you actually need + +6. **Document your publication strategy** so team members understand the setup ## Billing Implications {#billing-implications} - **Table-specific publications**: You pay only for data changes from explicitly listed tables -- **All-tables publications**: You pay for data changes from all tables in the database, even if ClickPipes only processes a subset -- **Automatic creation**: You pay only for data from selected tables (most cost-effective) +- **Automatic creation**: You pay only for data changes from selected tables (same cost as table-specific) +- **All-tables publications**: You pay for data changes from **all tables** in the database, even if ClickPipes only processes a subset - this is the costly approach to avoid ## Additional Resources {#additional-resources}