Skip to content

Commit e28de98

Browse files
Samiul-TheSoccerFanelasticmachineelasticsearchmachine
authored
Configurable Inference timeout during Query time (#131551)
* introducing timeout as cluster settings * forcing null to be send instead of default value * applying timeout in infer level * removing unused variable * adding unit tests for cluster timeout values * fix linting issues * Update docs/changelog/131551.yaml * update changelog * fix ml core SparseVectorQueryBuilder unit test * adding comment and Nullable annotation * adding restriction to make sure the cluster setting is only read during search operation * Refactored timeout logic per input type and added unit tests * fix unit test failure due to missing inferenceStat varaible * update comment for timeout * remove the timeout util file * resolve timeout from Service Utils and moved unit tests to service util * update comment for timeout * removed duplicate setting * update infernece plugin and utils streamline settings registration * using mockClusterService in all services * adding min value * Adding tests for provided timeout to work as expected * simplify inference timeout settings * [CI] Auto commit changes from spotless * added better async handling in the test and simplify response * revert back ingest timeout and simplify unit tests * remove redundant code * fix unnecessary instance creation --------- Co-authored-by: Elastic Machine <[email protected]> Co-authored-by: elasticsearchmachine <[email protected]>
1 parent d3a07f7 commit e28de98

File tree

18 files changed

+348
-38
lines changed

18 files changed

+348
-38
lines changed

docs/changelog/131551.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 131551
2+
summary: Added support to configure query timeout for inference
3+
area: Inference
4+
type: enhancement
5+
issues: []

server/src/main/java/org/elasticsearch/inference/InferenceService.java

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,9 @@ default boolean hideFromConfigurationApi() {
108108
* @param stream Stream inference results
109109
* @param taskSettings Settings in the request to override the model's defaults
110110
* @param inputType For search, ingest etc
111-
* @param timeout The timeout for the request
111+
* @param timeout The timeout for the request. Callers should normally pass in a timeout.
112+
* Passing in null is specifically for query-time inference, when the timeout is managed by the
113+
* xpack.inference.query_timeout cluster setting.
112114
* @param listener Inference result listener
113115
*/
114116
void infer(
@@ -120,7 +122,7 @@ void infer(
120122
boolean stream,
121123
Map<String, Object> taskSettings,
122124
InputType inputType,
123-
TimeValue timeout,
125+
@Nullable TimeValue timeout,
124126
ActionListener<InferenceServiceResults> listener
125127
);
126128

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilder.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.elasticsearch.xcontent.XContentBuilder;
3232
import org.elasticsearch.xcontent.XContentParser;
3333
import org.elasticsearch.xpack.core.ml.action.CoordinatedInferenceAction;
34-
import org.elasticsearch.xpack.core.ml.action.InferModelAction;
3534
import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings;
3635
import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
3736
import org.elasticsearch.xpack.core.ml.inference.results.WarningInferenceResults;
@@ -279,7 +278,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) {
279278
List.of(query),
280279
TextExpansionConfigUpdate.EMPTY_UPDATE,
281280
false,
282-
InferModelAction.Request.DEFAULT_TIMEOUT_FOR_API
281+
null
283282
);
284283
inferRequest.setHighPriority(true);
285284
inferRequest.setPrefixType(TrainedModelPrefixStrings.PrefixType.SEARCH);

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/vectors/TextEmbeddingQueryVectorBuilder.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import org.elasticsearch.xcontent.XContentBuilder;
2020
import org.elasticsearch.xcontent.XContentParser;
2121
import org.elasticsearch.xpack.core.ml.action.CoordinatedInferenceAction;
22-
import org.elasticsearch.xpack.core.ml.action.InferModelAction;
2322
import org.elasticsearch.xpack.core.ml.inference.TrainedModelConfig;
2423
import org.elasticsearch.xpack.core.ml.inference.TrainedModelPrefixStrings;
2524
import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
@@ -116,7 +115,7 @@ public void buildVector(Client client, ActionListener<float[]> listener) {
116115
List.of(modelText),
117116
TextEmbeddingConfigUpdate.EMPTY_INSTANCE,
118117
false,
119-
InferModelAction.Request.DEFAULT_TIMEOUT_FOR_API
118+
null
120119
);
121120

122121
inferRequest.setHighPriority(true);

x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/ml/search/SparseVectorQueryBuilderTests.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ protected boolean canSimulateMethod(Method method, Object[] args) throws NoSuchM
124124
@Override
125125
protected Object simulateMethod(Method method, Object[] args) {
126126
CoordinatedInferenceAction.Request request = (CoordinatedInferenceAction.Request) args[1];
127-
assertEquals(InferModelAction.Request.DEFAULT_TIMEOUT_FOR_API, request.getInferenceTimeout());
127+
assertNull(request.getInferenceTimeout());
128128
assertEquals(TrainedModelPrefixStrings.PrefixType.SEARCH, request.getPrefixType());
129129
assertEquals(CoordinatedInferenceAction.Request.RequestModelType.NLP_MODEL, request.getRequestModelType());
130130

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/InferencePlugin.java

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,8 @@
145145

146146
import java.util.ArrayList;
147147
import java.util.Collection;
148+
import java.util.Collections;
149+
import java.util.HashSet;
148150
import java.util.List;
149151
import java.util.Map;
150152
import java.util.Set;
@@ -180,6 +182,13 @@ public class InferencePlugin extends Plugin
180182
Setting.Property.NodeScope,
181183
Setting.Property.Dynamic
182184
);
185+
public static final Setting<TimeValue> INFERENCE_QUERY_TIMEOUT = Setting.timeSetting(
186+
"xpack.inference.query_timeout",
187+
TimeValue.timeValueSeconds(10),
188+
TimeValue.timeValueMillis(1),
189+
Setting.Property.NodeScope,
190+
Setting.Property.Dynamic
191+
);
183192

184193
public static final LicensedFeature.Momentary INFERENCE_API_FEATURE = LicensedFeature.momentary(
185194
"inference",
@@ -490,7 +499,11 @@ public static ExecutorBuilder<?> inferenceUtilityExecutor(Settings settings) {
490499

491500
@Override
492501
public List<Setting<?>> getSettings() {
493-
ArrayList<Setting<?>> settings = new ArrayList<>();
502+
return List.copyOf(getInferenceSettings());
503+
}
504+
505+
public static Set<Setting<?>> getInferenceSettings() {
506+
Set<Setting<?>> settings = new HashSet<>();
494507
settings.addAll(HttpSettings.getSettingsDefinitions());
495508
settings.addAll(HttpClientManager.getSettingsDefinitions());
496509
settings.addAll(ThrottlerManager.getSettingsDefinitions());
@@ -499,9 +512,9 @@ public List<Setting<?>> getSettings() {
499512
settings.addAll(RequestExecutorServiceSettings.getSettingsDefinitions());
500513
settings.add(SKIP_VALIDATE_AND_START);
501514
settings.add(INDICES_INFERENCE_BATCH_SIZE);
515+
settings.add(INFERENCE_QUERY_TIMEOUT);
502516
settings.addAll(ElasticInferenceServiceSettings.getSettingsDefinitions());
503-
504-
return settings;
517+
return Collections.unmodifiableSet(settings);
505518
}
506519

507520
@Override

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/queries/SemanticQueryBuilder.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
import org.elasticsearch.xcontent.XContentBuilder;
3232
import org.elasticsearch.xcontent.XContentParser;
3333
import org.elasticsearch.xpack.core.inference.action.InferenceAction;
34-
import org.elasticsearch.xpack.core.ml.action.InferModelAction;
3534
import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults;
3635
import org.elasticsearch.xpack.core.ml.inference.results.MlTextEmbeddingResults;
3736
import org.elasticsearch.xpack.core.ml.inference.results.TextExpansionResults;
@@ -237,7 +236,7 @@ private SemanticQueryBuilder doRewriteGetInferenceResults(QueryRewriteContext qu
237236
List.of(query),
238237
Map.of(),
239238
InputType.INTERNAL_SEARCH,
240-
InferModelAction.Request.DEFAULT_TIMEOUT_FOR_API,
239+
null,
241240
false
242241
);
243242

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/SenderService.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,10 @@ public void infer(
7070
boolean stream,
7171
Map<String, Object> taskSettings,
7272
InputType inputType,
73-
TimeValue timeout,
73+
@Nullable TimeValue timeout,
7474
ActionListener<InferenceServiceResults> listener
7575
) {
76+
timeout = ServiceUtils.resolveInferenceTimeout(timeout, inputType, clusterService);
7677
init();
7778
var chunkInferenceInput = input.stream().map(i -> new ChunkInferenceInput(i, null)).toList();
7879
var inferenceInput = createInput(this, model, chunkInferenceInput, inputType, query, returnDocuments, topN, stream);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/ServiceUtils.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import org.elasticsearch.ElasticsearchStatusException;
1111
import org.elasticsearch.action.ActionRequestValidationException;
12+
import org.elasticsearch.cluster.service.ClusterService;
1213
import org.elasticsearch.common.ValidationException;
1314
import org.elasticsearch.common.settings.SecureString;
1415
import org.elasticsearch.core.Nullable;
@@ -21,7 +22,9 @@
2122
import org.elasticsearch.inference.SimilarityMeasure;
2223
import org.elasticsearch.inference.TaskType;
2324
import org.elasticsearch.rest.RestStatus;
25+
import org.elasticsearch.xpack.core.inference.action.InferenceAction;
2426
import org.elasticsearch.xpack.core.ml.inference.assignment.AdaptiveAllocationsSettings;
27+
import org.elasticsearch.xpack.inference.InferencePlugin;
2528
import org.elasticsearch.xpack.inference.services.settings.ApiKeySecrets;
2629

2730
import java.net.URI;
@@ -1098,5 +1101,24 @@ public static void checkByteBounds(short value) {
10981101
}
10991102
}
11001103

1104+
/**
1105+
* Resolves the inference timeout based on input type and cluster settings.
1106+
*
1107+
* @param timeout The provided timeout value, may be null
1108+
* @param inputType The input type for the inference request
1109+
* @param clusterService The cluster service to get timeout settings from
1110+
* @return The resolved timeout value
1111+
*/
1112+
public static TimeValue resolveInferenceTimeout(@Nullable TimeValue timeout, InputType inputType, ClusterService clusterService) {
1113+
if (timeout == null) {
1114+
if (inputType == InputType.SEARCH || inputType == InputType.INTERNAL_SEARCH) {
1115+
return clusterService.getClusterSettings().get(InferencePlugin.INFERENCE_QUERY_TIMEOUT);
1116+
} else {
1117+
return InferenceAction.Request.DEFAULT_TIMEOUT;
1118+
}
1119+
}
1120+
return timeout;
1121+
}
1122+
11011123
private ServiceUtils() {}
11021124
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/elasticsearch/BaseElasticsearchInternalService.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,10 @@ private void preferredVariantFromPlatformArchitecture(ActionListener<PreferredMo
299299
);
300300
}
301301

302+
protected ClusterService getClusterService() {
303+
return clusterService;
304+
}
305+
302306
boolean isClusterInElasticCloud() {
303307
// Use the ml lazy node count as a heuristic to determine if in Elastic cloud.
304308
// A value > 0 means scaling should be available for ml nodes

0 commit comments

Comments
 (0)