Skip to content
Open
8 changes: 5 additions & 3 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ http://s.apache.org/luceneversions

API Changes
---------------------
* GITHUB#15324: Fix MaxScoreBulkScorer could call TermScorer with docID >= maxDoc, causing EOFException on norms access (contributor: kdt523)

* GITHUB#15215: Switch to Java 25 as the minimum required platform. Upgrade to gradle 9.1.0.
(Robert Muir, Kaival Parikh, Dawid Weiss)

Expand Down Expand Up @@ -98,6 +100,9 @@ Bug Fixes

* GITHUB#15125: Handle inconsistent schema on flush with index sorts (Nhat Nguyen)

* GITHUB#15343: Ensure that `AcceptDocs#cost()` only ever calls `BitSets#cardinality()`
once per instance to avoid redundant computation. (Ben Trent)

Changes in Runtime Behavior
---------------------
* GITHUB#14187: The query cache is now disabled by default. (Adrien Grand)
Expand Down Expand Up @@ -204,11 +209,8 @@ Optimizations

* GITHUB#15343: Ensure that `AcceptDocs#cost()` only ever calls `BitSets#cardinality()`
once per instance to avoid redundant computation. (Ben Trent)

* GITHUB#14963: Bypass HNSW graph building for tiny segments. (Shubham Chaudhary, Ben Trent)

Bug Fixes
---------------------
* GITHUB#14161: PointInSetQuery's constructor now throws IllegalArgumentException
instead of UnsupportedOperationException when values are out of order. (Shubham Sharma)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,11 +89,15 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
// Then within these outer windows, it creates inner windows of size WINDOW_SIZE that help
// collect matches into a bitset and save the overhead of rebalancing the priority queue on
// every match.
// Never iterate beyond this leaf's maxDoc to avoid scoring invalid doc IDs.
final int loopMax = Math.min(max, maxDoc);

int outerWindowMin = min;
outer:
while (outerWindowMin < max) {
while (outerWindowMin < loopMax) {
int outerWindowMax = computeOuterWindowMax(outerWindowMin);
outerWindowMax = Math.min(outerWindowMax, max);
// Cap outer window by loopMax (which itself is <= maxDoc)
outerWindowMax = Math.min(outerWindowMax, loopMax);

while (true) {
updateMaxWindowScores(outerWindowMin, outerWindowMax);
Expand Down Expand Up @@ -178,7 +182,9 @@ private void scoreInnerWindowWithFilter(
// Only score an inner window, after that we'll check if the min competitive score has increased
// enough for a more favorable partitioning to be used.
int innerWindowMin = top.doc;
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE);
// Ensure innerWindowMax never exceeds maxDoc
int innerWindowMax =
Math.min(maxDoc, MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE));

docAndScoreAccBuffer.size = 0;
while (top.doc < innerWindowMax) {
Expand Down Expand Up @@ -241,7 +247,8 @@ private void scoreInnerWindowMultipleEssentialClauses(
DisiWrapper top = essentialQueue.top();

int innerWindowMin = top.doc;
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE);
int innerWindowMax =
Math.min(maxDoc, MathUtil.unsignedMin(max, innerWindowMin + INNER_WINDOW_SIZE));
int innerWindowSize = innerWindowMax - innerWindowMin;

// Collect matches of essential clauses into a bitset
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
// ...existing code...
import org.apache.lucene.tests.util.LuceneTestCase;

/**
* Regression test for a bug where MaxScoreBulkScorer could score past leaf maxDoc when a
* restrictive filter and disjunction were used together.
*/
public class TestMaxScoreBulkScorerFilterBounds extends LuceneTestCase {

public void testFilteredDisjunctionDoesNotScorePastMaxDoc() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = new IndexWriterConfig();
try (IndexWriter w = new IndexWriter(dir, iwc)) {
// Create a small index where one clause matches more docs than the other, and a restrictive
// filter
for (int i = 0; i < 200; i++) {
Document d = new Document();
// Clause A matches ~1/3
d.add(new StringField("a", (i % 3 == 0) ? "yes" : "no", Field.Store.NO));
// Clause B matches ~1/9
d.add(new StringField("b", (i % 9 == 0) ? "yes" : "no", Field.Store.NO));
// Restrictive filter matches ~1%
d.add(new StringField("f", (i % 100 == 0) ? "on" : "off", Field.Store.NO));
w.addDocument(d);
}
}

try (DirectoryReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);

Query disjunction =
new BooleanQuery.Builder()
.add(new TermQuery(new Term("a", "yes")), BooleanClause.Occur.SHOULD)
.add(new TermQuery(new Term("b", "yes")), BooleanClause.Occur.SHOULD)
.build();

Query filter = new TermQuery(new Term("f", "on"));

Query filtered =
new BooleanQuery.Builder()
.add(disjunction, BooleanClause.Occur.SHOULD)
.add(filter, BooleanClause.Occur.FILTER)
.build();

// This triggers TOP_SCORES path internally; just execute to ensure no exceptions
TopDocs td = searcher.search(filtered, 10);
assertNotNull(td);
// Optionally assert we got at most 2 hits (since ~200 docs, ~1% filter) but not necessary for
// regression
} finally {
dir.close();
}
}
}
Loading