Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,15 @@ public List<Column> currentSchema() {
final List<CommandGenerator.CommandDescription> previousCommands = new ArrayList<>();
QueryExecuted previousResult;
};
EsqlQueryGenerator.generatePipeline(MAX_DEPTH, sourceCommand(), mappingInfo, exec, requiresTimeSeries(), this);
EsqlQueryGenerator.generatePipeline(
MAX_DEPTH,
sourceCommand(),
EsqlQueryGenerator.PIPE_COMMANDS,
mappingInfo,
exec,
requiresTimeSeries(),
this
);
}
}

Expand All @@ -182,7 +190,8 @@ private static CommandGenerator.ValidationResult checkResults(
previousResult == null ? null : previousResult.outputSchema(),
previousResult == null ? null : previousResult.result(),
result.outputSchema(),
result.result()
result.result(),
false
);
if (outputValidation.success() == false) {
for (Pattern allowedError : ALLOWED_ERROR_PATTERNS) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.elasticsearch.xpack.esql.generator.command.pipe.TimeSeriesStatsGenerator;
import org.elasticsearch.xpack.esql.generator.command.pipe.WhereGenerator;
import org.elasticsearch.xpack.esql.generator.command.source.FromGenerator;
import org.elasticsearch.xpack.esql.generator.command.source.SimpleFromGenerator;
import org.elasticsearch.xpack.esql.generator.command.source.TimeSeriesGenerator;

import java.util.List;
Expand All @@ -48,7 +49,9 @@ public class EsqlQueryGenerator {
/**
* These are commands that are at the beginning of the query, eg. FROM
*/
static List<CommandGenerator> SOURCE_COMMANDS = List.of(FromGenerator.INSTANCE);
public static List<CommandGenerator> SOURCE_COMMANDS = List.of(FromGenerator.INSTANCE);

public static List<CommandGenerator> SIMPLIFIED_SOURCE_COMMANDS = List.of(SimpleFromGenerator.INSTANCE);

/**
* Commands at the beginning of queries that begin queries on time series indices, eg. TS
Expand All @@ -58,7 +61,7 @@ public class EsqlQueryGenerator {
/**
* These are downstream commands, ie. that cannot appear as the first command in a query
*/
static List<CommandGenerator> PIPE_COMMANDS = List.of(
public static List<CommandGenerator> PIPE_COMMANDS = List.of(
ChangePointGenerator.INSTANCE,
DissectGenerator.INSTANCE,
DropGenerator.INSTANCE,
Expand All @@ -76,6 +79,25 @@ public class EsqlQueryGenerator {
WhereGenerator.INSTANCE
);

/**
* Same as PIPE_COMMANDS but without the more complex commands (Fork, Enrich, Join).
* This is needed in CSV tests, that don't support the full ES capabilities
*/
public static List<CommandGenerator> SIMPLIFIED_PIPE_COMMANDS = List.of(
ChangePointGenerator.INSTANCE,
DissectGenerator.INSTANCE,
DropGenerator.INSTANCE,
EvalGenerator.INSTANCE,
GrokGenerator.INSTANCE,
KeepGenerator.INSTANCE,
LimitGenerator.INSTANCE,
MvExpandGenerator.INSTANCE,
RenameGenerator.INSTANCE,
SortGenerator.INSTANCE,
StatsGenerator.INSTANCE,
WhereGenerator.INSTANCE
);

static List<CommandGenerator> TIME_SERIES_PIPE_COMMANDS = Stream.concat(
PIPE_COMMANDS.stream(),
Stream.of(TimeSeriesStatsGenerator.INSTANCE)
Expand All @@ -85,6 +107,10 @@ public static CommandGenerator sourceCommand() {
return randomFrom(SOURCE_COMMANDS);
}

public static CommandGenerator simplifiedSourceCommand() {
return randomFrom(SIMPLIFIED_SOURCE_COMMANDS);
}

public static CommandGenerator timeSeriesSourceCommand() {
return randomFrom(TIME_SERIES_SOURCE_COMMANDS);
}
Expand All @@ -111,6 +137,7 @@ public interface Executor {
public static void generatePipeline(
final int depth,
CommandGenerator commandGenerator,
List<CommandGenerator> pipelineGenerators,
final CommandGenerator.QuerySchema schema,
Executor executor,
boolean isTimeSeries,
Expand All @@ -131,7 +158,7 @@ public static void generatePipeline(
while (commandAllowed == false) {
commandGenerator = isTimeSeries && canGenerateTimeSeries
? randomMetricsPipeCommandGenerator()
: randomPipeCommandGenerator();
: randomFrom(pipelineGenerators);
if (isTimeSeries == false) {
commandAllowed = true;
} else {
Expand All @@ -149,6 +176,7 @@ public static void generatePipeline(
}
}
}

desc = commandGenerator.generate(executor.previousCommands(), executor.currentSchema(), schema, queryExecutor);
if (desc == CommandGenerator.EMPTY_DESCRIPTION) {
continue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ All you have to do is:
*** Have a look at `EsqlQueryGenerator`, it contains many utility methods that will help you generate random expressions.
** Implement `CommandGenerator.validateOutput()` to validate the output of the query.
* Add your class to `EsqlQueryGenerator.SOURCE_COMMANDS` (if it's a source command) or `EsqlQueryGenerator.PIPE_COMMANDS` (if it's a pipe command).
These will be used by `GenerativeIT` to pick a random command to append to the query.
** Also consider adding your generators (or a simplified version of them)
to `EsqlQueryGenerator.SIMPLIFIED_SOURCE_COMMANDS` or `EsqlQueryGenerator.SIMPLIFIED_PIPE_COMMANDS`.
These are used to generate queries in contexts when the full complexity of ES|QL is not supported (eg. in `GenerativeCsvIT`).
* Run `GenerativeIT` at least a couple of times: these tests can be pretty noisy.
* If you get unexpected errors (real bugs in ES|QL), please open an issue and add the error to `GenerativeRestTest.ALLOWED_ERRORS`. Run tests again until everything works fine.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
return VALIDATION_OK;
}
Expand All @@ -71,7 +72,8 @@ public ValidationResult validateOutput(
* @param schema The columns returned by the query so far. It contains name and type information for each column.
* @param executor
* @return All the details about the generated command. See {@link CommandDescription}.
* If something goes wrong and for some reason you can't generate a command, you should return {@link CommandGenerator#EMPTY_DESCRIPTION}
* If something goes wrong and for some reason you can't generate a command,
* you should return {@link CommandGenerator#EMPTY_DESCRIPTION}
*/
CommandDescription generate(
List<CommandDescription> previousCommands,
Expand All @@ -89,11 +91,16 @@ CommandDescription generate(
* It also contains the context information you stored during command generation.
* @param previousColumns The output schema of the original query (without last generated command).
* It contains name and type information for each column, see {@link Column}
* @param previousOutput The output of the original query (without last generated command), as a list (rows) of lists (columns) of values
* @param previousOutput The output of the original query (without last generated command),
* as a list (rows) of lists (columns) of values
* @param columns The output schema of the full query (WITH last generated command).
* @param output The output of the full query (WITH last generated command), as a list (rows) of lists (columns) of values
* @param deterministic True if the query is executed in deterministic mode (eg. in CsvTests), ie. that the
* results (also their order) are stable between multiple executions.
* False if the query is executed in non-deterministic mode (eg. in GenerativeIT, against an ES cluster)
* @return The result of the output validation. If the validation succeeds, you should return {@link CommandGenerator#VALIDATION_OK}.
* Also, if for some reason you can't validate the output, just return {@link CommandGenerator#VALIDATION_OK}; for a command, having a generator without
* Also, if for some reason you can't validate the output, just return {@link CommandGenerator#VALIDATION_OK};
* for a command, having a generator without
* validation is much better than having no generator at all.
*/
ValidationResult validateOutput(
Expand All @@ -102,20 +109,39 @@ ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
);

static ValidationResult expectSameRowCount(
List<CommandDescription> previousCommands,
List<List<Object>> previousOutput,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
if (deterministic && previousOutput.size() != output.size()) {
return new ValidationResult(false, "Expecting [" + previousOutput.size() + "] rows, got [" + output.size() + "]");
}

return VALIDATION_OK;
}

// ES|QL is quite non-deterministic in this sense, we can't guarantee it for now
// if (output.size() != previousOutput.size()) {
// return new ValidationResult(false, "Expecting [" + previousOutput.size() + "] rows, but got [" + output.size() + "]");
// }
static ValidationResult expectSameData(List<List<Object>> before, int beforeCol, List<List<Object>> after, int afterCol) {
if (before.size() != after.size()) {
return new ValidationResult(false, "Expecting same number of rows, got [" + before.size() + "] and [" + after.size() + "]");
}

for (int i = 0; i < before.size(); i++) {
Object v1 = before.get(i).get(beforeCol);
Object v2 = after.get(i).get(afterCol);
if (v1 == null) {
if (v2 != null) {
return new ValidationResult(false, "Expecting null at row [" + i + "], got [" + v2 + "]");
}
} else if (v1.equals(v2) == false) {
return new ValidationResult(false, "Expecting [" + v1 + "] at row [" + i + "], got [" + v2 + "]");
}
}
return VALIDATION_OK;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
return CommandGenerator.expectAtLeastSameNumberOfColumns(previousColumns, columns);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
if (commandDescription == EMPTY_DESCRIPTION) {
return VALIDATION_OK;
Expand All @@ -76,6 +77,6 @@ public ValidationResult validateOutput(
return new ValidationResult(false, "Expecting at least [" + previousColumns.size() + "] columns, got [" + columns.size() + "]");
}

return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output);
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output, deterministic);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
if (commandDescription == EMPTY_DESCRIPTION) {
return VALIDATION_OK;
Expand All @@ -86,8 +87,17 @@ public ValidationResult validateOutput(
return new ValidationResult(false, "Column [" + droppedColumn + "] was not dropped");
}
}
// TODO awaits fix https://github.com/elastic/elasticsearch/issues/120272
// return CommandGenerator.expectSameRowCount(previousOutput, output);

if (deterministic) {
for (int columnIdx = 0; columnIdx < columns.size(); columnIdx++) {
Column c = columns.get(columnIdx);
int previousColumnIdx = previousColumns.indexOf(c);
if (previousColumnIdx == -1) {
return new ValidationResult(false, "Column [" + c + "] not in previous output");
}
CommandGenerator.expectSameData(previousOutput, previousColumnIdx, output, columnIdx);
}
}
return VALIDATION_OK;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
if (commandDescription == EMPTY_DESCRIPTION) {
return VALIDATION_OK;
Expand All @@ -59,6 +60,6 @@ public ValidationResult validateOutput(
return new ValidationResult(false, "Expecting at least [" + previousColumns.size() + "] columns, got [" + columns.size() + "]");
}

return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output);
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output, deterministic);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
List<String> expectedColumns = (List<String>) commandDescription.context().get(NEW_COLUMNS);
List<String> resultColNames = columns.stream().map(Column::name).toList();
Expand All @@ -87,7 +88,7 @@ public ValidationResult validateOutput(
);
}

return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output);
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output, deterministic);
}

private static String unquote(String colName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,14 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
return VALIDATION_OK;
}
};

EsqlQueryGenerator.generatePipeline(3, gen, schema, exec, false, executor);
EsqlQueryGenerator.generatePipeline(3, gen, EsqlQueryGenerator.PIPE_COMMANDS, schema, exec, false, executor);
if (exec.previousCommands().size() > 1) {
String previousCmd = exec.previousCommands()
.stream()
Expand All @@ -143,8 +144,9 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output);
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output, deterministic);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
if (commandDescription == EMPTY_DESCRIPTION) {
return VALIDATION_OK;
}
if (previousColumns.size() > columns.size()) {
return new ValidationResult(false, "Expecting at least [" + previousColumns.size() + "] columns, got [" + columns.size() + "]");
}
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output);
return CommandGenerator.expectSameRowCount(previousCommands, previousOutput, output, deterministic);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ public ValidationResult validateOutput(
List<Column> previousColumns,
List<List<Object>> previousOutput,
List<Column> columns,
List<List<Object>> output
List<List<Object>> output,
boolean deterministic
) {
if (commandDescription == EMPTY_DESCRIPTION) {
return VALIDATION_OK;
Expand All @@ -79,6 +80,16 @@ public ValidationResult validateOutput(
return new ValidationResult(false, "Expecting at most [" + previousColumns.size() + "] columns, got [" + columns.size() + "]");
}

if (deterministic) {
for (int columnIdx = 0; columnIdx < columns.size(); columnIdx++) {
Column c = columns.get(columnIdx);
int previousColumnIdx = previousColumns.indexOf(columns.get(columnIdx));
if (previousColumnIdx == -1) {
return new ValidationResult(false, "Column [" + c + "] not in previous output");
}
CommandGenerator.expectSameData(previousOutput, previousColumnIdx, output, columnIdx);
}
}
return VALIDATION_OK;
}

Expand Down
Loading