Skip to content

Commit 44acb87

Browse files
dor-forermeiravgri
andauthored
Generalize Micro-benchmarks [MOD-10160] (#703)
* add array and index * add docs fix "$BM_TYPE" = "bm-updated-fp32-single" * unify Memory benchmarks function to take index type parameter: Replace Memory_FLAT/HNSW/Tiered with single Memory function taking IndexTypeIndex to reduce code duplication. replace INDEX_TYPE_SVS_QUANTIZED with INDEX_TYPE_SVS_COMPRESSED * refactor: improve index ownership management in benchmarks - Replaced manual reference counting with `IndexPtr` for safer and simpler ownership management. - Introduced `IndexPtr` as a smart pointer wrapper for `VecSimIndex` to: 1. Ensure proper cleanup using RAII principles. 2. Enable safe ownership transfer via `release_ownership()`. 3. Support shared ownership with thread pool using `get_shared()`. introducee GET_INDEX to access an index in the indices array refactor: improve benchmark ownership and initialization logic - Ensured one-time initialization using `is_initialized` flags. * RunTopK_HNSW: remove unused is_tiered rename IndexTypeFlags etries from INDEX_TYPE to INDEX_MASK initialize threadpool mock only if needed * initizlie bf after hnsw * fix mockethread pool deletion * introduce DEFAULT_BM_INDEXES_MASK that defines bf, hnsw and tierred address CR --------- Co-authored-by: meiravgri <[email protected]>
1 parent b1245ff commit 44acb87

40 files changed

+420
-323
lines changed

tests/benchmark/bm_batch_iterator.h

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ void BM_BatchIterator<index_type_t>::RunBatchedSearch_HNSW(
4444
benchmark::State &st, std::atomic_int &correct, size_t iter, size_t num_batches,
4545
size_t batch_size, size_t &total_res_num, size_t batch_increase_factor, size_t index_memory,
4646
double &memory_delta) {
47-
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(
48-
INDICES.at(VecSimAlgo_HNSWLIB), QUERIES[iter % N_QUERIES].data(), nullptr);
47+
VecSimBatchIterator *batchIterator =
48+
VecSimBatchIterator_New(GET_INDEX(INDEX_HNSW), QUERIES[iter % N_QUERIES].data(), nullptr);
4949
VecSimQueryReply *accumulated_results[num_batches];
5050
size_t batch_num = 0;
5151
total_res_num = 0;
@@ -62,13 +62,13 @@ void BM_BatchIterator<index_type_t>::RunBatchedSearch_HNSW(
6262
}
6363
st.PauseTiming();
6464
// Update the memory delta as a result of using the batch iterator.
65-
size_t curr_memory = VecSimIndex_StatsInfo(INDICES.at(VecSimAlgo_HNSWLIB)).memory;
65+
size_t curr_memory = VecSimIndex_StatsInfo(GET_INDEX(INDEX_HNSW)).memory;
6666
memory_delta += (double)(curr_memory - index_memory);
6767
VecSimBatchIterator_Free(batchIterator);
6868

6969
// Measure recall - compare every result that was collected in some batch to the BF results.
70-
auto bf_results = VecSimIndex_TopKQuery(
71-
INDICES[VecSimAlgo_BF], QUERIES[iter % N_QUERIES].data(), total_res_num, nullptr, BY_SCORE);
70+
auto bf_results = VecSimIndex_TopKQuery(GET_INDEX(INDEX_BF), QUERIES[iter % N_QUERIES].data(),
71+
total_res_num, nullptr, BY_SCORE);
7272
for (size_t i = 0; i < batch_num; i++) {
7373
auto hnsw_results = accumulated_results[i];
7474
BM_VecSimGeneral::MeasureRecall(hnsw_results, bf_results, correct);
@@ -83,12 +83,12 @@ void BM_BatchIterator<index_type_t>::BF_FixedBatchSize(benchmark::State &st) {
8383
size_t batch_size = st.range(0);
8484
size_t num_batches = st.range(1);
8585
size_t iter = 0;
86-
size_t index_memory = VecSimIndex_StatsInfo(INDICES[VecSimAlgo_BF]).memory;
86+
size_t index_memory = VecSimIndex_StatsInfo(GET_INDEX(INDEX_BF)).memory;
8787
double memory_delta = 0.0;
8888

8989
for (auto _ : st) {
90-
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(
91-
INDICES[VecSimAlgo_BF], QUERIES[iter % N_QUERIES].data(), nullptr);
90+
VecSimBatchIterator *batchIterator =
91+
VecSimBatchIterator_New(GET_INDEX(INDEX_BF), QUERIES[iter % N_QUERIES].data(), nullptr);
9292
size_t batches_counter = 0;
9393
while (VecSimBatchIterator_HasNext(batchIterator)) {
9494
VecSimQueryReply *res = VecSimBatchIterator_Next(batchIterator, batch_size, BY_ID);
@@ -98,7 +98,7 @@ void BM_BatchIterator<index_type_t>::BF_FixedBatchSize(benchmark::State &st) {
9898
break;
9999
}
100100
}
101-
size_t curr_memory = VecSimIndex_StatsInfo(INDICES[VecSimAlgo_BF]).memory;
101+
size_t curr_memory = VecSimIndex_StatsInfo(GET_INDEX(INDEX_BF)).memory;
102102
memory_delta += (double)(curr_memory - index_memory);
103103
VecSimBatchIterator_Free(batchIterator);
104104
iter++;
@@ -112,8 +112,8 @@ void BM_BatchIterator<index_type_t>::BF_VariableBatchSize(benchmark::State &st)
112112
size_t num_batches = st.range(1);
113113
size_t iter = 0;
114114
for (auto _ : st) {
115-
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(
116-
INDICES[VecSimAlgo_BF], QUERIES[iter % N_QUERIES].data(), nullptr);
115+
VecSimBatchIterator *batchIterator =
116+
VecSimBatchIterator_New(GET_INDEX(INDEX_BF), QUERIES[iter % N_QUERIES].data(), nullptr);
117117
size_t batches_counter = 0;
118118
while (VecSimBatchIterator_HasNext(batchIterator)) {
119119
VecSimQueryReply *res = VecSimBatchIterator_Next(batchIterator, batch_size, BY_ID);
@@ -136,8 +136,8 @@ void BM_BatchIterator<index_type_t>::BF_BatchesToAdhocBF(benchmark::State &st) {
136136
size_t batch_size = 10;
137137
size_t iter = 0;
138138
for (auto _ : st) {
139-
VecSimBatchIterator *batchIterator = VecSimBatchIterator_New(
140-
INDICES[VecSimAlgo_BF], QUERIES[iter % N_QUERIES].data(), nullptr);
139+
VecSimBatchIterator *batchIterator =
140+
VecSimBatchIterator_New(GET_INDEX(INDEX_BF), QUERIES[iter % N_QUERIES].data(), nullptr);
141141
size_t batches_counter = 0;
142142
while (VecSimBatchIterator_HasNext(batchIterator)) {
143143
if (batches_counter == num_batches) {
@@ -151,7 +151,7 @@ void BM_BatchIterator<index_type_t>::BF_BatchesToAdhocBF(benchmark::State &st) {
151151
VecSimBatchIterator_Free(batchIterator);
152152
// Switch to ad-hoc BF
153153
for (size_t i = 0; i < N_VECTORS; i += step) {
154-
VecSimIndex_GetDistanceFrom_Unsafe(INDICES[VecSimAlgo_BF], i,
154+
VecSimIndex_GetDistanceFrom_Unsafe(GET_INDEX(INDEX_BF), i,
155155
QUERIES[iter % N_QUERIES].data());
156156
}
157157
iter++;
@@ -166,7 +166,7 @@ void BM_BatchIterator<index_type_t>::HNSW_FixedBatchSize(benchmark::State &st) {
166166
size_t total_res_num = num_batches * batch_size;
167167
size_t iter = 0;
168168
std::atomic_int correct = 0;
169-
size_t index_memory = VecSimIndex_StatsInfo(INDICES[VecSimAlgo_HNSWLIB]).memory;
169+
size_t index_memory = VecSimIndex_StatsInfo(GET_INDEX(INDEX_HNSW)).memory;
170170
double memory_delta = 0.0;
171171

172172
for (auto _ : st) {
@@ -185,7 +185,7 @@ void BM_BatchIterator<index_type_t>::HNSW_VariableBatchSize(benchmark::State &st
185185
size_t total_res_num;
186186
size_t iter = 0;
187187
std::atomic_int correct = 0;
188-
size_t index_memory = VecSimIndex_StatsInfo(INDICES[VecSimAlgo_HNSWLIB]).memory;
188+
size_t index_memory = VecSimIndex_StatsInfo(GET_INDEX(INDEX_HNSW)).memory;
189189
double memory_delta = 0.0;
190190

191191
for (auto _ : st) {
@@ -204,15 +204,15 @@ void BM_BatchIterator<index_type_t>::HNSW_BatchesToAdhocBF(benchmark::State &st)
204204
size_t total_res_num;
205205
size_t iter = 0;
206206
std::atomic_int correct = 0;
207-
size_t index_memory = VecSimIndex_StatsInfo(INDICES[VecSimAlgo_HNSWLIB]).memory;
207+
size_t index_memory = VecSimIndex_StatsInfo(GET_INDEX(INDEX_HNSW)).memory;
208208
double memory_delta = 0.0;
209209

210210
for (auto _ : st) {
211211
RunBatchedSearch_HNSW(st, correct, iter, num_batches, 10, total_res_num, 2, index_memory,
212212
memory_delta);
213213
// Switch to ad-hoc BF
214214
for (size_t i = 0; i < N_VECTORS; i += step) {
215-
VecSimIndex_GetDistanceFrom_Unsafe(INDICES[VecSimAlgo_HNSWLIB], i,
215+
VecSimIndex_GetDistanceFrom_Unsafe(GET_INDEX(INDEX_HNSW), i,
216216
QUERIES[iter % N_QUERIES].data());
217217
}
218218
iter++;

tests/benchmark/bm_common.h

Lines changed: 21 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,10 @@ class BM_VecSimCommon : public BM_VecSimIndex<index_type_t> {
2323
BM_VecSimCommon() = default;
2424
~BM_VecSimCommon() = default;
2525

26+
// index_offset: Offset added to base index types to access variants (0=original, 1=updated)
27+
2628
static void RunTopK_HNSW(benchmark::State &st, size_t ef, size_t iter, size_t k,
27-
std::atomic_int &correct, unsigned short index_offset = 0,
28-
bool is_tiered = false);
29+
std::atomic_int &correct, unsigned short index_offset = 0);
2930

3031
// Search for the K closest vectors to the query in the index. K is defined in the
3132
// test registration (initialization file).
@@ -36,24 +37,22 @@ class BM_VecSimCommon : public BM_VecSimIndex<index_type_t> {
3637
static void TopK_Tiered(benchmark::State &st, unsigned short index_offset = 0);
3738

3839
// Does nothing but returning the index memory.
39-
static void Memory_FLAT(benchmark::State &st, unsigned short index_offset = 0);
40-
static void Memory_HNSW(benchmark::State &st, unsigned short index_offset = 0);
41-
static void Memory_Tiered(benchmark::State &st, unsigned short index_offset = 0);
40+
static void Memory(benchmark::State &st, IndexTypeIndex index_type);
4241
};
4342

4443
template <typename index_type_t>
4544
void BM_VecSimCommon<index_type_t>::RunTopK_HNSW(benchmark::State &st, size_t ef, size_t iter,
4645
size_t k, std::atomic_int &correct,
47-
unsigned short index_offset, bool is_tiered) {
46+
unsigned short index_offset) {
4847
HNSWRuntimeParams hnswRuntimeParams = {.efRuntime = ef};
4948
auto query_params = BM_VecSimGeneral::CreateQueryParams(hnswRuntimeParams);
50-
auto hnsw_results = VecSimIndex_TopKQuery(
51-
INDICES[is_tiered ? VecSimAlgo_TIERED : VecSimAlgo_HNSWLIB + index_offset],
52-
QUERIES[iter % N_QUERIES].data(), k, &query_params, BY_SCORE);
49+
auto hnsw_results =
50+
VecSimIndex_TopKQuery(GET_INDEX(INDEX_HNSW + index_offset),
51+
QUERIES[iter % N_QUERIES].data(), k, &query_params, BY_SCORE);
5352
st.PauseTiming();
5453

5554
// Measure recall:
56-
auto bf_results = VecSimIndex_TopKQuery(INDICES[VecSimAlgo_BF + index_offset],
55+
auto bf_results = VecSimIndex_TopKQuery(GET_INDEX(INDEX_BF + index_offset),
5756
QUERIES[iter % N_QUERIES].data(), k, nullptr, BY_SCORE);
5857

5958
BM_VecSimGeneral::MeasureRecall(hnsw_results, bf_results, correct);
@@ -64,35 +63,15 @@ void BM_VecSimCommon<index_type_t>::RunTopK_HNSW(benchmark::State &st, size_t ef
6463
}
6564

6665
template <typename index_type_t>
67-
void BM_VecSimCommon<index_type_t>::Memory_FLAT(benchmark::State &st, unsigned short index_offset) {
68-
auto index = INDICES[VecSimAlgo_BF + index_offset];
69-
index->fitMemory();
70-
71-
for (auto _ : st) {
72-
// Do nothing...
73-
}
74-
st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory;
75-
}
76-
template <typename index_type_t>
77-
void BM_VecSimCommon<index_type_t>::Memory_HNSW(benchmark::State &st, unsigned short index_offset) {
78-
auto index = INDICES[VecSimAlgo_HNSWLIB + index_offset];
66+
void BM_VecSimCommon<index_type_t>::Memory(benchmark::State &st, IndexTypeIndex index_type) {
67+
auto index = GET_INDEX(index_type);
7968
index->fitMemory();
8069

8170
for (auto _ : st) {
8271
// Do nothing...
8372
}
8473
st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory;
8574
}
86-
template <typename index_type_t>
87-
void BM_VecSimCommon<index_type_t>::Memory_Tiered(benchmark::State &st,
88-
unsigned short index_offset) {
89-
auto index = INDICES[VecSimAlgo_TIERED + index_offset];
90-
index->fitMemory();
91-
for (auto _ : st) {
92-
// Do nothing...
93-
}
94-
st.counters["memory"] = (double)VecSimIndex_StatsInfo(index).memory;
95-
}
9675

9776
// TopK search BM
9877

@@ -101,8 +80,8 @@ void BM_VecSimCommon<index_type_t>::TopK_BF(benchmark::State &st, unsigned short
10180
size_t k = st.range(0);
10281
size_t iter = 0;
10382
for (auto _ : st) {
104-
VecSimIndex_TopKQuery(INDICES[VecSimAlgo_BF + index_offset],
105-
QUERIES[iter % N_QUERIES].data(), k, nullptr, BY_SCORE);
83+
VecSimIndex_TopKQuery(GET_INDEX(INDEX_BF + index_offset), QUERIES[iter % N_QUERIES].data(),
84+
k, nullptr, BY_SCORE);
10685
iter++;
10786
}
10887
}
@@ -126,8 +105,8 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s
126105
size_t k = st.range(1);
127106
std::atomic_int correct = 0;
128107
std::atomic_int iter = 0;
129-
auto *tiered_index =
130-
dynamic_cast<TieredHNSWIndex<data_t, dist_t> *>(INDICES[VecSimAlgo_TIERED]);
108+
auto tiered_index =
109+
dynamic_cast<TieredHNSWIndex<data_t, dist_t> *>(GET_INDEX(INDEX_TIERED_HNSW));
131110
size_t total_iters = 50;
132111
VecSimQueryReply *all_results[total_iters];
133112

@@ -136,9 +115,9 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s
136115
HNSWRuntimeParams hnswRuntimeParams = {.efRuntime = search_job->ef};
137116
auto query_params = BM_VecSimGeneral::CreateQueryParams(hnswRuntimeParams);
138117
size_t cur_iter = search_job->iter;
139-
auto hnsw_results =
140-
VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[cur_iter % N_QUERIES].data(),
141-
search_job->k, &query_params, BY_SCORE);
118+
auto hnsw_results = VecSimIndex_TopKQuery(GET_INDEX(INDEX_TIERED_HNSW),
119+
QUERIES[cur_iter % N_QUERIES].data(),
120+
search_job->k, &query_params, BY_SCORE);
142121
search_job->all_results[cur_iter] = hnsw_results;
143122
delete job;
144123
};
@@ -149,14 +128,14 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s
149128
tiered_index, k, ef, iter++, all_results);
150129
tiered_index->submitSingleJob(search_job);
151130
if (iter == total_iters) {
152-
BM_VecSimGeneral::mock_thread_pool.thread_pool_wait();
131+
BM_VecSimGeneral::mock_thread_pool->thread_pool_wait();
153132
}
154133
}
155134

156135
// Measure recall
157136
for (iter = 0; iter < total_iters; iter++) {
158137
auto bf_results =
159-
VecSimIndex_TopKQuery(INDICES[VecSimAlgo_BF + index_offset],
138+
VecSimIndex_TopKQuery(GET_INDEX(INDEX_BF + index_offset),
160139
QUERIES[iter % N_QUERIES].data(), k, nullptr, BY_SCORE);
161140
BM_VecSimGeneral::MeasureRecall(all_results[iter], bf_results, correct);
162141

@@ -165,7 +144,7 @@ void BM_VecSimCommon<index_type_t>::TopK_Tiered(benchmark::State &st, unsigned s
165144
}
166145

167146
st.counters["Recall"] = (float)correct / (float)(k * iter);
168-
st.counters["num_threads"] = (double)BM_VecSimGeneral::mock_thread_pool.thread_pool_size;
147+
st.counters["num_threads"] = (double)BM_VecSimGeneral::mock_thread_pool->thread_pool_size;
169148
}
170149

171150
#define REGISTER_TopK_BF(BM_CLASS, BM_FUNC) \

tests/benchmark/bm_definitions.h

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,80 @@ struct IndexType {
2121
typedef DistType dist_t;
2222
};
2323

24+
// Array indices for accessing different index types in the indices array
25+
// Note: Updated variants are offset by 1 from their base types (e.g., INDEX_BF_UPDATED = INDEX_BF +
26+
// 1)
27+
enum IndexTypeIndex {
28+
INDEX_BF = 0,
29+
INDEX_BF_UPDATED,
30+
INDEX_HNSW,
31+
INDEX_HNSW_UPDATED,
32+
INDEX_TIERED_HNSW,
33+
INDEX_SVS,
34+
INDEX_TIERED_SVS,
35+
INDEX_SVS_QUANTIZED,
36+
NUMBER_OF_INDEX_TYPES // Keep last
37+
};
38+
39+
// Bit flags for selectively enabling index types in benchmarks via
40+
// BM_VecSimGeneral::enabled_index_types bitmask Limited to 32 index types because IndexTypeFlags is
41+
// stored in a 32-bit mask (uint32_t)
42+
// Note: Bit positions currently match IndexTypeIndex values but this is not required by the code
43+
enum IndexTypeFlags {
44+
INDEX_MASK_BF = 1 << 0,
45+
INDEX_MASK_BF_UPDATED = 1 << 1,
46+
INDEX_MASK_HNSW = 1 << 2,
47+
INDEX_MASK_HNSW_UPDATED = 1 << 3,
48+
INDEX_MASK_TIERED_HNSW = 1 << 4,
49+
INDEX_MASK_SVS = 1 << 5,
50+
INDEX_MASK_TIERED_SVS = 1 << 6,
51+
INDEX_MASK_SVS_COMPRESSED = 1 << 7
52+
};
53+
54+
// Smart pointer wrapper for VecSimIndex with configurable ownership
55+
// Supports:
56+
// 1. Ownership control via release_ownership()
57+
// 2. Sharing with thread pool's shared_ptr
58+
// 3. Safe transfer of ownership to tiered index
59+
class IndexPtr {
60+
private:
61+
std::shared_ptr<VecSimIndex> ptr;
62+
std::shared_ptr<bool> owns_ptr;
63+
64+
public:
65+
// Default constructor - creates empty pointer with ownership
66+
IndexPtr() : ptr(nullptr), owns_ptr(std::make_shared<bool>(true)) {}
67+
68+
// Constructor - always starts with ownership
69+
explicit IndexPtr(VecSimIndex *p) : owns_ptr(std::make_shared<bool>(true)) {
70+
if (p) {
71+
ptr = std::shared_ptr<VecSimIndex>(p, [owns_ptr = this->owns_ptr](VecSimIndex *p) {
72+
if (*owns_ptr) {
73+
VecSimIndex_Free(p);
74+
}
75+
});
76+
}
77+
}
78+
79+
// Prevent copying to ensure clear ownership
80+
IndexPtr(const IndexPtr &) = delete;
81+
IndexPtr &operator=(const IndexPtr &) = delete;
82+
83+
// Allow moving
84+
IndexPtr(IndexPtr &&) = default;
85+
IndexPtr &operator=(IndexPtr &&) = default;
86+
87+
// Access methods
88+
VecSimIndex *get() const { return ptr.get(); }
89+
std::shared_ptr<VecSimIndex> get_shared() { return ptr; }
90+
91+
// Implicit conversion to raw pointer for ease of use
92+
operator VecSimIndex *() const { return ptr.get(); }
93+
94+
// Ownership control
95+
void release_ownership() { *owns_ptr = false; }
96+
};
97+
2498
using fp32_index_t = IndexType<VecSimType_FLOAT32, float, float>;
2599
using fp64_index_t = IndexType<VecSimType_FLOAT64, double, double>;
26100
using bf16_index_t = IndexType<VecSimType_BFLOAT16, vecsim_types::bfloat16, float>;
@@ -29,9 +103,13 @@ using int8_index_t = IndexType<VecSimType_INT8, int8_t, float>;
29103
using uint8_index_t = IndexType<VecSimType_UINT8, uint8_t, float>;
30104

31105
#define INDICES BM_VecSimIndex<index_type_t>::indices
106+
#define GET_INDEX BM_VecSimIndex<index_type_t>::get_index
32107
#define QUERIES BM_VecSimIndex<index_type_t>::queries
33108
#define N_QUERIES BM_VecSimGeneral::n_queries
34109
#define N_VECTORS BM_VecSimGeneral::n_vectors
35110
#define DIM BM_VecSimGeneral::dim
36111
#define IS_MULTI BM_VecSimGeneral::is_multi
37-
#define REF_COUNT BM_VecSimIndex<index_type_t>::ref_count
112+
113+
constexpr uint32_t DEFAULT_BM_INDEXES_MASK = IndexTypeFlags::INDEX_MASK_BF |
114+
IndexTypeFlags::INDEX_MASK_HNSW |
115+
IndexTypeFlags::INDEX_MASK_TIERED_HNSW;

0 commit comments

Comments
 (0)