Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -819,15 +819,16 @@ public void testLookupExplosionNoFetchManyMatches() throws IOException {
}

public void testLookupExplosionBigString() throws IOException {
int sensorDataCount = 150;
int sensorDataCount = 500;
int lookupEntries = 1;
Map<?, ?> map = lookupExplosionBigString(sensorDataCount, lookupEntries);
assertMap(map, matchesMap().extraOk().entry("values", List.of(List.of(sensorDataCount * lookupEntries))));
}

public void testLookupExplosionBigStringManyMatches() throws IOException {
// 500, 1 is enough to make it fail locally but some CI needs more
assertCircuitBreaks(attempt -> lookupExplosionBigString(attempt * 500, 1));
// 500, 1 is enough with a single node, but the serverless copy of this test uses many nodes.
// So something like 5000, 10 is much more of a sure thing there.
assertCircuitBreaks(attempt -> lookupExplosionBigString(attempt * 5000, 10));
}

private Map<String, Object> lookupExplosion(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Objects;
import java.util.Set;
import java.util.function.Function;

Expand Down Expand Up @@ -145,7 +145,7 @@ public enum DataType {
* Fields of this type are unsupported by any functions and are always
* rendered as {@code null} in the response.
*/
UNSUPPORTED(builder().typeName("UNSUPPORTED").unknownSize()),
UNSUPPORTED(builder().typeName("UNSUPPORTED").estimatedSize(1024)),
/**
* Fields that are always {@code null}, usually created with constant
* {@code null} values.
Expand Down Expand Up @@ -238,15 +238,15 @@ public enum DataType {
* Generally ESQL uses {@code keyword} fields as raw strings. So things like
* {@code TO_STRING} will make a {@code keyword} field.
*/
KEYWORD(builder().esType("keyword").unknownSize().docValues()),
KEYWORD(builder().esType("keyword").estimatedSize(50).docValues()),
/**
* String fields that are analyzed when the document is received and may be
* cut into more than one token. Generally ESQL only sees {@code text} fields
* when loaded from the index and ESQL will load these fields
* <strong>without</strong> analysis. The {@code MATCH} operator can be used
* to query these fields with analysis.
*/
TEXT(builder().esType("text").unknownSize()),
TEXT(builder().esType("text").estimatedSize(1024)),
/**
* Millisecond precision date, stored as a 64-bit signed number.
*/
Expand All @@ -267,8 +267,8 @@ public enum DataType {
*/
// 8.15.2-SNAPSHOT is 15 bytes, most are shorter, some can be longer
VERSION(builder().esType("version").estimatedSize(15).docValues()),
OBJECT(builder().esType("object").unknownSize()),
SOURCE(builder().esType(SourceFieldMapper.NAME).unknownSize()),
OBJECT(builder().esType("object").estimatedSize(1024)),
SOURCE(builder().esType(SourceFieldMapper.NAME).estimatedSize(10 * 1024)),
DATE_PERIOD(builder().typeName("DATE_PERIOD").estimatedSize(3 * Integer.BYTES)),
TIME_DURATION(builder().typeName("TIME_DURATION").estimatedSize(Integer.BYTES + Long.BYTES)),
// WKB for points is typically 21 bytes.
Expand Down Expand Up @@ -298,20 +298,20 @@ public enum DataType {
* Every document in {@link IndexMode#TIME_SERIES} index will have a single value
* for this field and the segments themselves are sorted on this value.
*/
TSID_DATA_TYPE(builder().esType("_tsid").unknownSize().docValues()),
TSID_DATA_TYPE(builder().esType("_tsid").estimatedSize(Long.BYTES * 2).docValues()),
/**
* Fields with this type are the partial result of running a non-time-series aggregation
* inside alongside time-series aggregations. These fields are not parsable from the
* mapping and should be hidden from users.
*/
PARTIAL_AGG(builder().esType("partial_agg").unknownSize()),
PARTIAL_AGG(builder().esType("partial_agg").estimatedSize(1024)),

AGGREGATE_METRIC_DOUBLE(builder().esType("aggregate_metric_double").estimatedSize(Double.BYTES * 3 + Integer.BYTES)),

/**
* Fields with this type are dense vectors, represented as an array of double values.
*/
DENSE_VECTOR(builder().esType("dense_vector").unknownSize());
DENSE_VECTOR(builder().esType("dense_vector").estimatedSize(4096));

/**
* Types that are actively being built. These types are
Expand Down Expand Up @@ -341,7 +341,7 @@ public enum DataType {

private final String esType;

private final Optional<Integer> estimatedSize;
private final int estimatedSize;

/**
* True if the type represents a "whole number", as in, does <strong>not</strong> have a decimal part.
Expand Down Expand Up @@ -377,11 +377,10 @@ public enum DataType {

DataType(Builder builder) {
String typeString = builder.typeName != null ? builder.typeName : builder.esType;
assert builder.estimatedSize != null : "Missing size for type " + typeString;
this.typeName = typeString.toLowerCase(Locale.ROOT);
this.name = typeString.toUpperCase(Locale.ROOT);
this.esType = builder.esType;
this.estimatedSize = builder.estimatedSize;
this.estimatedSize = Objects.requireNonNull(builder.estimatedSize, "estimated size is required");
this.isWholeNumber = builder.isWholeNumber;
this.isRationalNumber = builder.isRationalNumber;
this.docValues = builder.docValues;
Expand Down Expand Up @@ -683,10 +682,21 @@ public boolean isNumeric() {
}

/**
* @return the estimated size, in bytes, of this data type. If there's no reasonable way to estimate the size,
* the optional will be empty.
* An estimate of the size of values of this type in a Block. All types must have an
* estimate, and generally follow the following rules:
* <ol>
* <li>
* If you know the precise size of a single element of this type, use that.
* For example {@link #INTEGER} uses {@link Integer#BYTES}.
* </li>
* <li>
* Overestimates are better than under-estimates. Over-estimates make less
* efficient operations, but under-estimates make circuit breaker errors.
* </li>
* </ol>
* @return the estimated size of this data type in bytes
*/
public Optional<Integer> estimatedSize() {
public int estimatedSize() {
return estimatedSize;
}

Expand Down Expand Up @@ -801,7 +811,7 @@ private static class Builder {

private String typeName;

private Optional<Integer> estimatedSize;
private Integer estimatedSize;

/**
* True if the type represents a "whole number", as in, does <strong>not</strong> have a decimal part.
Expand Down Expand Up @@ -848,13 +858,11 @@ Builder typeName(String typeName) {
return this;
}

/**
* See {@link DataType#estimatedSize}.
*/
Builder estimatedSize(int size) {
this.estimatedSize = Optional.of(size);
return this;
}

Builder unknownSize() {
this.estimatedSize = Optional.empty();
this.estimatedSize = size;
return this;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,6 @@ static int estimateSize(DataType dataType) {
if (elementType == ElementType.UNKNOWN) {
throw new EsqlIllegalArgumentException("[unknown] can't be the result of field extraction");
}
return dataType.estimatedSize().orElse(50);
return dataType.estimatedSize();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -459,8 +459,8 @@ public static DataType commonType(DataType left, DataType right) {
return KEYWORD;
}
if (left.isNumeric() && right.isNumeric()) {
int lsize = left.estimatedSize().orElseThrow();
int rsize = right.estimatedSize().orElseThrow();
int lsize = left.estimatedSize();
int rsize = right.estimatedSize();
// if one is int
if (left.isWholeNumber()) {
// promote the highest int
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8018,7 +8018,7 @@ public void testReductionPlanForLimit() {
Tuple<PhysicalPlan, PhysicalPlan> plans = PlannerUtils.breakPlanBetweenCoordinatorAndDataNode(plan, config);
PhysicalPlan reduction = PlannerUtils.reductionPlan(plans.v2());
LimitExec limitExec = as(reduction, LimitExec.class);
assertThat(limitExec.estimatedRowSize(), equalTo(328));
assertThat(limitExec.estimatedRowSize(), equalTo(2276));
}

public void testEqualsPushdownToDelegate() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ private static void commonNumericType(DataType numericType, List<DataType> lower
List<DataType> NUMERICS = Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList();
List<DataType> DOUBLES = Arrays.stream(DataType.values()).filter(DataType::isRationalNumber).toList();
for (DataType dataType : DataType.values()) {
if (DOUBLES.containsAll(List.of(numericType, dataType)) && (dataType.estimatedSize().equals(numericType.estimatedSize()))) {
if (DOUBLES.containsAll(List.of(numericType, dataType)) && (dataType.estimatedSize() == numericType.estimatedSize())) {
assertEquals(numericType, commonType(dataType, numericType));
} else if (lowerTypes.contains(dataType)) {
assertEqualsCommonType(numericType, dataType, numericType);
Expand Down