Skip to content

Commit cbb5495

Browse files
CNDB-13192: Bump the default cassandra.sai.latest.version from DC to EC so we support BM25 by default. (#1737)
We do not need to update JVECTOR_VERSION post CNDB-14301
1 parent d41c2a8 commit cbb5495

File tree

3 files changed

+22
-15
lines changed

3 files changed

+22
-15
lines changed

src/java/org/apache/cassandra/config/CassandraRelevantProperties.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,7 @@ public enum CassandraRelevantProperties
358358
CUSTOM_SSTABLE_WATCHER("cassandra.custom_sstable_watcher"),
359359

360360
/** The current version of the SAI on-disk index format. */
361-
SAI_CURRENT_VERSION("cassandra.sai.latest.version", "dc"),
361+
SAI_CURRENT_VERSION("cassandra.sai.latest.version", "ec"),
362362

363363
/** Controls the maximum top-k limit for vector search */
364364
SAI_VECTOR_SEARCH_MAX_TOP_K("cassandra.sai.vector_search.max_top_k", "1000"),

test/unit/org/apache/cassandra/index/sai/disk/v1/InvertedIndexSearcherTest.java

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
import java.util.stream.Collectors;
2525

2626
import org.agrona.collections.Int2IntHashMap;
27+
import org.apache.cassandra.index.sai.*;
28+
import org.junit.Before;
2729
import org.junit.BeforeClass;
2830
import org.junit.Test;
2931

@@ -32,10 +34,6 @@
3234
import org.apache.cassandra.cql3.Operator;
3335
import org.apache.cassandra.db.marshal.UTF8Type;
3436
import org.apache.cassandra.dht.Murmur3Partitioner;
35-
import org.apache.cassandra.index.sai.IndexContext;
36-
import org.apache.cassandra.index.sai.QueryContext;
37-
import org.apache.cassandra.index.sai.SAITester;
38-
import org.apache.cassandra.index.sai.SSTableContext;
3937
import org.apache.cassandra.index.sai.disk.MemtableTermsIterator;
4038
import org.apache.cassandra.index.sai.disk.format.IndexComponents;
4139
import org.apache.cassandra.index.sai.disk.format.IndexDescriptor;
@@ -61,6 +59,8 @@ public class InvertedIndexSearcherTest extends SaiRandomizedTest
6159
// Use a shared index context to prevent creating too many metrics unnecessarily
6260
private final IndexContext indexContext = SAITester.createIndexContext("meh", UTF8Type.instance);
6361

62+
private final Version version;
63+
6464
@ParametersFactory()
6565
public static Collection<Object[]> data()
6666
{
@@ -70,7 +70,11 @@ public static Collection<Object[]> data()
7070
return Version.ALL.stream().map(v -> new Object[]{v}).collect(Collectors.toList());
7171
}
7272

73-
private final Version version;
73+
@Before
74+
public void setCurrentSAIVersion()
75+
{
76+
SAIUtil.setCurrentVersion(version);
77+
}
7478

7579
public InvertedIndexSearcherTest(Version version)
7680
{
@@ -214,7 +218,7 @@ private IndexSearcher buildIndexAndOpenSearcher(int terms, List<InvertedIndexBui
214218

215219
private List<InvertedIndexBuilder.TermsEnum> buildTermsEnum(Version version, int terms, int postings)
216220
{
217-
return InvertedIndexBuilder.buildStringTermsEnum(version, terms, postings, () -> randomSimpleString(3, 5), () -> nextInt(0, Integer.MAX_VALUE));
221+
return InvertedIndexBuilder.buildStringTermsEnum(version, terms, postings, () -> randomSimpleString(3, 5), () -> nextInt(0, terms * postings * 2));
218222
}
219223

220224
private Int2IntHashMap createMockDocLengths(List<InvertedIndexBuilder.TermsEnum> termsEnum)

test/unit/org/apache/cassandra/index/sai/disk/v1/TermsReaderTest.java

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,11 @@
1818
package org.apache.cassandra.index.sai.disk.v1;
1919

2020
import java.io.IOException;
21-
import java.util.ArrayList;
2221
import java.util.Collection;
23-
import java.util.HashMap;
2422
import java.util.List;
25-
import java.util.Map;
2623
import java.util.stream.Collectors;
2724

25+
import org.junit.Before;
2826
import org.junit.Test;
2927

3028
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
@@ -37,7 +35,6 @@
3735
import org.apache.cassandra.index.sai.SAIUtil;
3836
import org.apache.cassandra.index.sai.disk.MemtableTermsIterator;
3937
import org.apache.cassandra.index.sai.disk.PostingList;
40-
import org.apache.cassandra.index.sai.disk.RAMStringIndexer;
4138
import org.apache.cassandra.index.sai.disk.TermsIterator;
4239
import org.apache.cassandra.index.sai.disk.format.IndexComponentType;
4340
import org.apache.cassandra.index.sai.disk.format.IndexComponents;
@@ -50,11 +47,9 @@
5047
import org.apache.cassandra.index.sai.utils.SaiRandomizedTest;
5148
import org.apache.cassandra.index.sai.utils.TypeUtil;
5249
import org.apache.cassandra.io.util.FileHandle;
53-
import org.apache.cassandra.utils.ByteBufferUtil;
5450
import org.apache.cassandra.utils.Pair;
5551
import org.apache.cassandra.utils.bytecomparable.ByteComparable;
5652
import org.apache.cassandra.utils.bytecomparable.ByteSourceInverse;
57-
import org.apache.lucene.util.BytesRef;
5853

5954
import static org.apache.cassandra.index.sai.disk.v1.InvertedIndexBuilder.buildStringTermsEnum;
6055
import static org.apache.cassandra.index.sai.metrics.QueryEventListeners.NO_OP_TRIE_LISTENER;
@@ -63,6 +58,8 @@ public class TermsReaderTest extends SaiRandomizedTest
6358
{
6459
public static final ByteComparable.Version VERSION = TypeUtil.BYTE_COMPARABLE_VERSION;
6560

61+
private final Version version;
62+
6663
@ParametersFactory()
6764
public static Collection<Object[]> data()
6865
{
@@ -72,7 +69,11 @@ public static Collection<Object[]> data()
7269
return Version.ALL.stream().map(v -> new Object[]{v}).collect(Collectors.toList());
7370
}
7471

75-
private final Version version;
72+
@Before
73+
public void setCurrentSAIVersion()
74+
{
75+
SAIUtil.setCurrentVersion(version);
76+
}
7677

7778
public TermsReaderTest(Version version)
7879
{
@@ -239,6 +240,8 @@ private Int2IntHashMap createMockDocLengths(List<InvertedIndexBuilder.TermsEnum>
239240

240241
private List<InvertedIndexBuilder.TermsEnum> buildTermsEnum(Version version, int terms, int postings)
241242
{
242-
return buildStringTermsEnum(version, terms, postings, () -> randomSimpleString(4, 10), () -> nextInt(0, Integer.MAX_VALUE));
243+
// We use terms * postings * 2 as the upper bound on row ids used in the postings list to allow for a somewhat
244+
// sparse mapping but not one that is too sparse.
245+
return buildStringTermsEnum(version, terms, postings, () -> randomSimpleString(4, 10), () -> nextInt(0, terms * postings * 2));
243246
}
244247
}

0 commit comments

Comments
 (0)