From f4c4c7ff8efa8f87b85aa0a99ba1f63241006ccc Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Tue, 18 Nov 2025 04:36:30 +0000 Subject: [PATCH 1/2] Refactor analyze for PAX Refactor analyze logic for PAX like AOCO. --- .../src/cpp/access/pax_access_handle.cc | 138 ++++++++++++++++-- .../src/cpp/access/pax_access_handle.h | 2 + contrib/pax_storage/src/cpp/comm/cbdb_api.h | 2 + .../pax_storage/src/cpp/comm/cbdb_wrappers.cc | 12 ++ .../pax_storage/src/cpp/comm/cbdb_wrappers.h | 3 + src/backend/commands/analyze.c | 22 +-- 6 files changed, 147 insertions(+), 32 deletions(-) diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc index 12caf0e9f64..1bbc7428178 100644 --- a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc @@ -49,6 +49,7 @@ #include "storage/paxc_smgr.h" #include "storage/wal/pax_wal.h" #include "storage/wal/paxc_wal.h" +#include "storage/pax_itemptr.h" #define NOT_IMPLEMENTED_YET \ ereport(ERROR, \ @@ -64,6 +65,111 @@ // access methods that are implemented in C++ namespace pax { +struct AnalyzeBlockItem { + int block; + int row_count; + int64 start_sample_block; + int64 end_sample_block; +}; + +static std::vector extract_micro_partitions(Relation rel, Snapshot snapshot, int64 *totalrows) { + auto iter = pax::MicroPartitionIterator::New(rel, snapshot); + std::vector analyze_items; + int64 ntuples = 0; + while (iter->HasNext()) { + auto mp = iter->Next(); + AnalyzeBlockItem item; + item.block = mp.GetMicroPartitionId(); + item.row_count = mp.GetTupleCount(); + Assert(item.row_count > 0); + + ntuples += item.row_count; + analyze_items.emplace_back(item); + } + iter->Release(); + *totalrows = ntuples; + + std::sort(analyze_items.begin(), analyze_items.end(), + [](const AnalyzeBlockItem &a, const AnalyzeBlockItem &b) { + return a.block < b.block; + }); + return analyze_items; +} + +static std::vector +extract_sample_items(Relation rel, Snapshot snapshot, int64 *totalrows) { + std::vector analyze_items; + analyze_items = extract_micro_partitions(rel, snapshot, totalrows); + + int64 row_index = 0; + for (size_t i = 0; i < analyze_items.size(); i++) { + auto &item = analyze_items[i]; + item.start_sample_block = row_index; + item.end_sample_block = row_index + item.row_count; + row_index = item.end_sample_block; + } + + return analyze_items; +} + +static int pax_acquire_sample_rows(Relation onerel, Snapshot snapshot, + HeapTuple *rows, int targrows, + double *totalrows, double *totaldeadrows) { + std::vector analyze_items; + int64 ntuples = 0; + analyze_items = extract_sample_items(onerel, snapshot, &ntuples); + + TupleTableSlot *slot = cbdb::MakeSingleTupleTableSlot( + RelationGetDescr(onerel), table_slot_callbacks(onerel)); + + // start sample rows + RowSamplerData rs; + size_t analyze_item_index = 0; + int numrows = 0; + double liverows = 0; + double deadrows = 0; + + PaxIndexScanDesc desc(onerel); + RowSampler_Init(&rs, ntuples, targrows, random()); + while (RowSampler_HasMore(&rs)) { + int64 sample_row = RowSampler_Next(&rs); + cbdb::VacuumDelayPoint(); + + // seek to the corresponding analyze item + while (analyze_item_index < analyze_items.size() && + sample_row >= analyze_items[analyze_item_index].end_sample_block) { + analyze_item_index++; + } + if (analyze_item_index == analyze_items.size()) { + break; + } + + const auto &item = analyze_items[analyze_item_index]; + Assert(sample_row >= item.start_sample_block && + sample_row < item.end_sample_block); + Assert(sample_row - item.start_sample_block < item.row_count); + + int offset = static_cast(sample_row - item.start_sample_block); + + ItemPointerData ctid = pax::MakeCTID(item.block, offset); + + bool ok = desc.FetchTuple(&ctid, snapshot, slot, nullptr, nullptr); + if (ok) { + liverows += 1; + rows[numrows++] = cbdb::ExecCopyHeapTuple(slot); + } else { + // dead rows + deadrows += 1; + } + cbdb::ExecClearTuple(slot); + } + *totaldeadrows = deadrows / rs.m * (double) ntuples; + *totalrows = ntuples - *totaldeadrows; + desc.Release(); + cbdb::ExecDropSingleTupleTableSlot(slot); + + return numrows; +} TableScanDesc CCPaxAccessMethod::ScanBegin(Relation relation, Snapshot snapshot, int nkeys, struct ScanKeyData *key, @@ -285,33 +391,34 @@ TM_Result CCPaxAccessMethod::TupleUpdate(Relation relation, ItemPointer otid, pg_unreachable(); } -bool CCPaxAccessMethod::ScanAnalyzeNextBlock(TableScanDesc scan, - BlockNumber blockno, - BufferAccessStrategy bstrategy) { +int CCPaxAccessMethod::AcquireSampleRows(Relation onerel, int elevel, HeapTuple *rows, + int targrows, double *totalrows, double *totaldeadrows) { + auto snapshot = GetCatalogSnapshot(InvalidOid); CBDB_TRY(); { - auto desc = PaxScanDesc::ToDesc(scan); - return desc->ScanAnalyzeNextBlock(blockno, bstrategy); + return pax_acquire_sample_rows(onerel, snapshot, rows, targrows, + totalrows, totaldeadrows); } CBDB_CATCH_DEFAULT(); - CBDB_FINALLY({}); CBDB_END_TRY(); pg_unreachable(); } +bool CCPaxAccessMethod::ScanAnalyzeNextBlock(TableScanDesc scan, + BlockNumber blockno, + BufferAccessStrategy bstrategy) { + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("analyze next block is not supported on pax relations"))); +} + bool CCPaxAccessMethod::ScanAnalyzeNextTuple(TableScanDesc scan, TransactionId oldest_xmin, double *liverows, double *deadrows, TupleTableSlot *slot) { - CBDB_TRY(); - { - auto desc = PaxScanDesc::ToDesc(scan); - return desc->ScanAnalyzeNextTuple(oldest_xmin, liverows, deadrows, slot); - } - CBDB_CATCH_DEFAULT(); - CBDB_FINALLY({}); - CBDB_END_TRY(); - pg_unreachable(); + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("analyze next tuple is not supported on pax relations"))); } bool CCPaxAccessMethod::ScanBitmapNextBlock(TableScanDesc scan, @@ -770,6 +877,7 @@ static const TableAmRoutine kPaxColumnMethods = { .relation_vacuum = paxc::PaxAccessMethod::RelationVacuum, .scan_analyze_next_block = pax::CCPaxAccessMethod::ScanAnalyzeNextBlock, .scan_analyze_next_tuple = pax::CCPaxAccessMethod::ScanAnalyzeNextTuple, + .relation_acquire_sample_rows = pax::CCPaxAccessMethod::AcquireSampleRows, .index_build_range_scan = paxc::PaxAccessMethod::IndexBuildRangeScan, .index_validate_scan = paxc::PaxAccessMethod::IndexValidateScan, diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.h b/contrib/pax_storage/src/cpp/access/pax_access_handle.h index 9ed827b4709..6897936b2eb 100644 --- a/contrib/pax_storage/src/cpp/access/pax_access_handle.h +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.h @@ -171,6 +171,8 @@ class CCPaxAccessMethod final { static void RelationNontransactionalTruncate(Relation rel); + static int AcquireSampleRows(Relation onerel, int elevel, HeapTuple *rows, + int targrows, double *totalrows, double *totaldeadrows); static bool ScanAnalyzeNextBlock(TableScanDesc scan, BlockNumber blockno, BufferAccessStrategy bstrategy); static bool ScanAnalyzeNextTuple(TableScanDesc scan, diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_api.h b/contrib/pax_storage/src/cpp/comm/cbdb_api.h index 30db87143a1..b5c0a928c27 100644 --- a/contrib/pax_storage/src/cpp/comm/cbdb_api.h +++ b/contrib/pax_storage/src/cpp/comm/cbdb_api.h @@ -79,6 +79,7 @@ extern "C" { #include "commands/defrem.h" #include "commands/progress.h" #include "commands/tablecmds.h" +#include "commands/vacuum.h" #include "funcapi.h" #include "miscadmin.h" #include "nodes/bitmapset.h" @@ -135,6 +136,7 @@ extern "C" { #include "utils/memutils.h" #include "utils/numeric.h" #include "utils/relcache.h" +#include "utils/sampling.h" #include "utils/snapshot.h" #include "utils/spccache.h" #include "utils/syscache.h" diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc index 3e54e965698..a86c1dd2fa6 100644 --- a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc +++ b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.cc @@ -611,6 +611,12 @@ TupleTableSlot *cbdb::MakeSingleTupleTableSlot( CBDB_WRAP_END; } +HeapTuple cbdb::ExecCopyHeapTuple(TupleTableSlot *slot) { + CBDB_WRAP_START; + { return ::ExecCopySlotHeapTuple(slot); } + CBDB_WRAP_END; +} + void cbdb::SlotGetAllAttrs(TupleTableSlot *slot) { CBDB_WRAP_START; { ::slot_getallattrs(slot); } @@ -628,3 +634,9 @@ void cbdb::ExecStoreVirtualTuple(TupleTableSlot *slot) { { ::ExecStoreVirtualTuple(slot); } CBDB_WRAP_END; } + +void cbdb::VacuumDelayPoint() { + CBDB_WRAP_START; + { vacuum_delay_point(); } + CBDB_WRAP_END; +} \ No newline at end of file diff --git a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h index 2031662357d..f6b1215d48a 100644 --- a/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h +++ b/contrib/pax_storage/src/cpp/comm/cbdb_wrappers.h @@ -285,6 +285,8 @@ bool NeedWAL(Relation rel); void ExecDropSingleTupleTableSlot(TupleTableSlot *slot); TupleTableSlot *MakeSingleTupleTableSlot(TupleDesc tupdesc, const TupleTableSlotOps *tts_ops); +HeapTuple ExecCopyHeapTuple(TupleTableSlot *slot); + void SlotGetAllAttrs(TupleTableSlot *slot); @@ -292,6 +294,7 @@ void ExecClearTuple(TupleTableSlot *slot); void ExecStoreVirtualTuple(TupleTableSlot *slot); +void VacuumDelayPoint(); } // namespace cbdb // clang-format off diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 6f5c4c5c42c..87d356a8a2b 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -1728,24 +1728,12 @@ acquire_sample_rows(Relation onerel, int elevel, * the relation should not be an AO/CO table. */ Assert(!RelationIsAppendOptimized(onerel)); - if (RelationIsPax(onerel)) - { - /* PAX use non-fixed block layout */ - BlockNumber pages; - double tuples; - double allvisfrac; - int32 attr_widths; - - table_relation_estimate_size(onerel, &attr_widths, &pages, - &tuples, &allvisfrac); - - if (tuples > UINT_MAX) - tuples = UINT_MAX; + /* + * PAX uses table_relation_acquire_sample_rows() as well. + */ + Assert(!RelationIsPax(onerel)); - totalblocks = (BlockNumber)tuples; - } - else - totalblocks = RelationGetNumberOfBlocks(onerel); + totalblocks = RelationGetNumberOfBlocks(onerel); /* Need a cutoff xmin for HeapTupleSatisfiesVacuum */ OldestXmin = GetOldestNonRemovableTransactionId(onerel); From a2ad4620618888d8e3f99953ff4461aa08ecbbea Mon Sep 17 00:00:00 2001 From: Hao Wu Date: Wed, 19 Nov 2025 03:57:24 +0000 Subject: [PATCH 2/2] fix value --- contrib/pax_storage/src/cpp/access/pax_access_handle.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc index 1bbc7428178..69ccf010b4a 100644 --- a/contrib/pax_storage/src/cpp/access/pax_access_handle.cc +++ b/contrib/pax_storage/src/cpp/access/pax_access_handle.cc @@ -163,8 +163,13 @@ static int pax_acquire_sample_rows(Relation onerel, Snapshot snapshot, } cbdb::ExecClearTuple(slot); } - *totaldeadrows = deadrows / rs.m * (double) ntuples; - *totalrows = ntuples - *totaldeadrows; + if (rs.m > 0) { + *totaldeadrows = deadrows / rs.m * (double) ntuples; + *totalrows = ntuples - *totaldeadrows; + } else { + *totalrows = 0.0; + *totaldeadrows = 0.0; + } desc.Release(); cbdb::ExecDropSingleTupleTableSlot(slot);