Skip to content

Commit db25ef7

Browse files
committed
Filter rows without col split when calculate grand total
Signed-off-by: Yuanchun Shen <[email protected]>
1 parent 93e3d03 commit db25ef7

18 files changed

+303
-277
lines changed

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2094,11 +2094,14 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) {
20942094

20952095
// 1: column-split, 2: agg
20962096
relBuilder.project(relBuilder.field(1), relBuilder.field(2));
2097+
// Make sure that rows who don't have a column split not interfere grand total calculation
2098+
relBuilder.filter(relBuilder.isNotNull(relBuilder.field(0)));
2099+
final String GRAND_TOTAL_COL = "__grand_total__";
20972100
relBuilder.aggregate(
20982101
relBuilder.groupKey(relBuilder.field(0)),
20992102
buildAggCall(context.relBuilder, aggFunction, relBuilder.field(1))
2100-
.as("__grand_total__")); // results: group key, agg calls
2101-
RexNode grandTotal = relBuilder.field("__grand_total__");
2103+
.as(GRAND_TOTAL_COL)); // results: group key, agg calls
2104+
RexNode grandTotal = relBuilder.field(GRAND_TOTAL_COL);
21022105
// Apply sorting: for MIN/EARLIEST, reverse the top/bottom logic
21032106
boolean smallestFirst =
21042107
aggFunction == BuiltinFunctionName.MIN || aggFunction == BuiltinFunctionName.EARLIEST;
@@ -2108,6 +2111,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) {
21082111

21092112
// Always set it to null last so that it does not interfere with top / bottom calculation
21102113
grandTotal = relBuilder.nullsLast(grandTotal);
2114+
final String ROW_NUM_COL = "__row_number__";
21112115
RexNode rowNum =
21122116
PlanUtils.makeOver(
21132117
context,
@@ -2117,7 +2121,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) {
21172121
List.of(),
21182122
List.of(grandTotal),
21192123
WindowFrame.toCurrentRow());
2120-
relBuilder.projectPlus(relBuilder.alias(rowNum, "__row_number__"));
2124+
relBuilder.projectPlus(relBuilder.alias(rowNum, ROW_NUM_COL));
21212125
RelNode ranked = relBuilder.build();
21222126

21232127
relBuilder.push(aggregated);
@@ -2131,7 +2135,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) {
21312135
RexNode lteCondition =
21322136
relBuilder.call(
21332137
SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
2134-
relBuilder.field("__row_number__"),
2138+
relBuilder.field(ROW_NUM_COL),
21352139
relBuilder.literal(limit));
21362140
RexNode nullCondition = relBuilder.isNull(colSplitPostJoin);
21372141
RexNode columnSplitExpr;

integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java

Lines changed: 83 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import org.junit.runner.RunWith;
1111
import org.junit.runners.Suite;
1212
import org.opensearch.sql.calcite.remote.*;
13+
import org.opensearch.sql.calcite.tpch.CalcitePPLTpchIT;
1314
import org.opensearch.sql.ppl.PPLIntegTestCase;
1415

1516
/**
@@ -20,88 +21,88 @@
2021
@RunWith(Suite.class)
2122
@Suite.SuiteClasses({
2223
CalciteExplainIT.class,
23-
// CalciteArrayFunctionIT.class,
24-
// CalciteBinCommandIT.class,
25-
// CalciteConvertTZFunctionIT.class,
26-
// CalciteCsvFormatIT.class,
27-
// CalciteDataTypeIT.class,
28-
// CalciteDateTimeComparisonIT.class,
29-
// CalciteDateTimeFunctionIT.class,
30-
// CalciteDateTimeImplementationIT.class,
31-
// CalciteDedupCommandIT.class,
32-
// CalciteDescribeCommandIT.class,
33-
// CalciteExpandCommandIT.class,
34-
// CalciteFieldsCommandIT.class,
35-
// CalciteFillNullCommandIT.class,
36-
// CalciteFlattenCommandIT.class,
37-
// CalciteFlattenDocValueIT.class,
38-
// CalciteGeoIpFunctionsIT.class,
39-
// CalciteGeoPointFormatsIT.class,
40-
// CalciteHeadCommandIT.class,
41-
// CalciteInformationSchemaCommandIT.class,
42-
// CalciteIPComparisonIT.class,
43-
// CalciteIPFunctionsIT.class,
44-
// CalciteJsonFunctionsIT.class,
45-
// CalciteLegacyAPICompatibilityIT.class,
46-
// CalciteLikeQueryIT.class,
47-
// CalciteMathematicalFunctionIT.class,
48-
// CalciteMultisearchCommandIT.class,
49-
// CalciteMultiValueStatsIT.class,
50-
// CalciteNewAddedCommandsIT.class,
51-
// CalciteNowLikeFunctionIT.class,
52-
// CalciteObjectFieldOperateIT.class,
53-
// CalciteOperatorIT.class,
54-
// CalciteParseCommandIT.class,
55-
// CalcitePPLAggregationIT.class,
56-
// CalcitePPLAppendcolIT.class,
57-
// CalcitePPLAppendCommandIT.class,
58-
// CalcitePPLBasicIT.class,
59-
// CalcitePPLBuiltinDatetimeFunctionInvalidIT.class,
60-
// CalcitePPLBuiltinFunctionIT.class,
61-
// CalcitePPLBuiltinFunctionsNullIT.class,
62-
// CalcitePPLCaseFunctionIT.class,
63-
// CalcitePPLCastFunctionIT.class,
64-
// CalcitePPLConditionBuiltinFunctionIT.class,
65-
// CalcitePPLCryptographicFunctionIT.class,
66-
// CalcitePPLDedupIT.class,
67-
// CalcitePPLEventstatsIT.class,
68-
// CalcitePPLExistsSubqueryIT.class,
69-
// CalcitePPLExplainIT.class,
70-
// CalcitePPLFillnullIT.class,
71-
// CalcitePPLGrokIT.class,
72-
// CalcitePPLInSubqueryIT.class,
73-
// CalcitePPLIPFunctionIT.class,
74-
// CalcitePPLJoinIT.class,
75-
// CalcitePPLJsonBuiltinFunctionIT.class,
76-
// CalcitePPLLookupIT.class,
77-
// CalcitePPLParseIT.class,
78-
// CalcitePPLPatternsIT.class,
79-
// CalcitePPLPluginIT.class,
80-
// CalcitePPLRenameIT.class,
81-
// CalcitePPLScalarSubqueryIT.class,
82-
// CalcitePPLSortIT.class,
83-
// CalcitePPLStringBuiltinFunctionIT.class,
84-
// CalcitePPLTrendlineIT.class,
85-
// CalcitePrometheusDataSourceCommandsIT.class,
86-
// CalciteQueryAnalysisIT.class,
87-
// CalciteRareCommandIT.class,
88-
// CalciteRegexCommandIT.class,
89-
// CalciteRexCommandIT.class,
90-
// CalciteRenameCommandIT.class,
91-
// CalciteReplaceCommandIT.class,
92-
// CalciteResourceMonitorIT.class,
93-
// CalciteSearchCommandIT.class,
94-
// CalciteSettingsIT.class,
95-
// CalciteShowDataSourcesCommandIT.class,
96-
// CalciteSortCommandIT.class,
97-
// CalciteStatsCommandIT.class,
98-
// CalciteSystemFunctionIT.class,
99-
// CalciteTextFunctionIT.class,
100-
// CalciteTopCommandIT.class,
101-
// CalciteTrendlineCommandIT.class,
102-
// CalciteVisualizationFormatIT.class,
103-
// CalciteWhereCommandIT.class,
104-
// CalcitePPLTpchIT.class
24+
CalciteArrayFunctionIT.class,
25+
CalciteBinCommandIT.class,
26+
CalciteConvertTZFunctionIT.class,
27+
CalciteCsvFormatIT.class,
28+
CalciteDataTypeIT.class,
29+
CalciteDateTimeComparisonIT.class,
30+
CalciteDateTimeFunctionIT.class,
31+
CalciteDateTimeImplementationIT.class,
32+
CalciteDedupCommandIT.class,
33+
CalciteDescribeCommandIT.class,
34+
CalciteExpandCommandIT.class,
35+
CalciteFieldsCommandIT.class,
36+
CalciteFillNullCommandIT.class,
37+
CalciteFlattenCommandIT.class,
38+
CalciteFlattenDocValueIT.class,
39+
CalciteGeoIpFunctionsIT.class,
40+
CalciteGeoPointFormatsIT.class,
41+
CalciteHeadCommandIT.class,
42+
CalciteInformationSchemaCommandIT.class,
43+
CalciteIPComparisonIT.class,
44+
CalciteIPFunctionsIT.class,
45+
CalciteJsonFunctionsIT.class,
46+
CalciteLegacyAPICompatibilityIT.class,
47+
CalciteLikeQueryIT.class,
48+
CalciteMathematicalFunctionIT.class,
49+
CalciteMultisearchCommandIT.class,
50+
CalciteMultiValueStatsIT.class,
51+
CalciteNewAddedCommandsIT.class,
52+
CalciteNowLikeFunctionIT.class,
53+
CalciteObjectFieldOperateIT.class,
54+
CalciteOperatorIT.class,
55+
CalciteParseCommandIT.class,
56+
CalcitePPLAggregationIT.class,
57+
CalcitePPLAppendcolIT.class,
58+
CalcitePPLAppendCommandIT.class,
59+
CalcitePPLBasicIT.class,
60+
CalcitePPLBuiltinDatetimeFunctionInvalidIT.class,
61+
CalcitePPLBuiltinFunctionIT.class,
62+
CalcitePPLBuiltinFunctionsNullIT.class,
63+
CalcitePPLCaseFunctionIT.class,
64+
CalcitePPLCastFunctionIT.class,
65+
CalcitePPLConditionBuiltinFunctionIT.class,
66+
CalcitePPLCryptographicFunctionIT.class,
67+
CalcitePPLDedupIT.class,
68+
CalcitePPLEventstatsIT.class,
69+
CalcitePPLExistsSubqueryIT.class,
70+
CalcitePPLExplainIT.class,
71+
CalcitePPLFillnullIT.class,
72+
CalcitePPLGrokIT.class,
73+
CalcitePPLInSubqueryIT.class,
74+
CalcitePPLIPFunctionIT.class,
75+
CalcitePPLJoinIT.class,
76+
CalcitePPLJsonBuiltinFunctionIT.class,
77+
CalcitePPLLookupIT.class,
78+
CalcitePPLParseIT.class,
79+
CalcitePPLPatternsIT.class,
80+
CalcitePPLPluginIT.class,
81+
CalcitePPLRenameIT.class,
82+
CalcitePPLScalarSubqueryIT.class,
83+
CalcitePPLSortIT.class,
84+
CalcitePPLStringBuiltinFunctionIT.class,
85+
CalcitePPLTrendlineIT.class,
86+
CalcitePrometheusDataSourceCommandsIT.class,
87+
CalciteQueryAnalysisIT.class,
88+
CalciteRareCommandIT.class,
89+
CalciteRegexCommandIT.class,
90+
CalciteRexCommandIT.class,
91+
CalciteRenameCommandIT.class,
92+
CalciteReplaceCommandIT.class,
93+
CalciteResourceMonitorIT.class,
94+
CalciteSearchCommandIT.class,
95+
CalciteSettingsIT.class,
96+
CalciteShowDataSourcesCommandIT.class,
97+
CalciteSortCommandIT.class,
98+
CalciteStatsCommandIT.class,
99+
CalciteSystemFunctionIT.class,
100+
CalciteTextFunctionIT.class,
101+
CalciteTopCommandIT.class,
102+
CalciteTrendlineCommandIT.class,
103+
CalciteVisualizationFormatIT.class,
104+
CalciteWhereCommandIT.class,
105+
CalcitePPLTpchIT.class
105106
})
106107
public class CalciteNoPushdownIT {
107108
private static boolean wasPushdownEnabled;

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteChartCommandIT.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ public void init() throws Exception {
2929
loadIndex(Index.BANK_WITH_NULL_VALUES);
3030
loadIndex(Index.OTELLOGS);
3131
loadIndex(Index.TIME_TEST_DATA);
32+
loadIndex(Index.EVENTS_NULL);
3233
}
3334

3435
@Test
@@ -282,6 +283,25 @@ public void testChartUseNullTrueWithNullStr() throws IOException {
282283
rows("F", "nil", null));
283284
}
284285

286+
@Test
287+
public void testChartWithNullAndLimit() throws IOException {
288+
JSONObject result =
289+
executeQuery("source=events_null | chart limit=3 count() over @timestamp span=1d by host");
290+
291+
verifySchema(
292+
result,
293+
schema("@timestamp", "timestamp"),
294+
schema("host", "string"),
295+
schema("count()", "bigint"));
296+
297+
verifyDataRows(
298+
result,
299+
rows("2024-07-01 00:00:00", "db-01", 1),
300+
rows("2024-07-01 00:00:00", "web-01", 2),
301+
rows("2024-07-01 00:00:00", "web-02", 2),
302+
rows("2024-07-01 00:00:00", "NULL", 1));
303+
}
304+
285305
@Test
286306
public void testChartUseNullFalseWithNullStr() throws IOException {
287307
JSONObject result =
Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,33 @@
11
calcite:
22
logical: |
33
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4-
LogicalAggregate(group=[{1, 2}], avg(balance)=[AVG($0)])
5-
LogicalProject(avg(balance)=[$0], gender=[$1], age=[CASE(IS NULL($2), 'NULL', <=($5, 10), $2, 'OTHER')])
6-
LogicalJoin(condition=[=($2, $3)], joinType=[left])
7-
LogicalProject(avg(balance)=[$2], gender=[$0], age=[SAFE_CAST($1)])
4+
LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)])
5+
LogicalProject(gender=[$0], age=[CASE(IS NULL($1), 'NULL', <=($5, 10), $1, 'OTHER')], avg(balance)=[$2])
6+
LogicalJoin(condition=[=($1, $3)], joinType=[left])
7+
LogicalProject(gender=[$0], age=[SAFE_CAST($1)], avg(balance)=[$2])
88
LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)])
99
LogicalProject(gender=[$4], age=[$10], balance=[$7])
1010
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
1111
LogicalProject(age=[$0], __grand_total__=[$1], __row_number__=[ROW_NUMBER() OVER (ORDER BY $1 DESC NULLS LAST)])
12-
LogicalAggregate(group=[{1}], __grand_total__=[AVG($0)])
13-
LogicalProject(avg(balance)=[$2], age=[SAFE_CAST($1)])
14-
LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)])
15-
LogicalProject(gender=[$4], age=[$10], balance=[$7])
16-
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
12+
LogicalAggregate(group=[{0}], __grand_total__=[AVG($1)])
13+
LogicalFilter(condition=[IS NOT NULL($0)])
14+
LogicalProject(age=[SAFE_CAST($1)], avg(balance)=[$2])
15+
LogicalAggregate(group=[{0, 1}], avg(balance)=[AVG($2)])
16+
LogicalProject(gender=[$4], age=[$10], balance=[$7])
17+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]])
1718
physical: |
1819
EnumerableLimit(fetch=[10000])
1920
EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:DOUBLE], expr#7=[CASE($t5, $t6, $t2)], expr#8=[/($t7, $t3)], proj#0..1=[{exprs}], avg(balance)=[$t8])
20-
EnumerableAggregate(group=[{1, 2}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)])
21-
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t2)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t2, $t9)], proj#0..1=[{exprs}], age=[$t10])
22-
EnumerableMergeJoin(condition=[=($2, $3)], joinType=[left])
23-
EnumerableSort(sort0=[$2], dir0=[ASC])
24-
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], avg(balance)=[$t2], gender=[$t0], age=[$t3])
21+
EnumerableAggregate(group=[{0, 1}], agg#0=[$SUM0($2)], agg#1=[COUNT($2)])
22+
EnumerableCalc(expr#0..4=[{inputs}], expr#5=[IS NULL($t1)], expr#6=['NULL'], expr#7=[10], expr#8=[<=($t4, $t7)], expr#9=['OTHER'], expr#10=[CASE($t5, $t6, $t8, $t1, $t9)], gender=[$t0], age=[$t10], avg(balance)=[$t2])
23+
EnumerableMergeJoin(condition=[=($1, $3)], joinType=[left])
24+
EnumerableSort(sort0=[$1], dir0=[ASC])
25+
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[SAFE_CAST($t1)], gender=[$t0], age=[$t3], avg(balance)=[$t2])
2526
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2))], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
2627
EnumerableSort(sort0=[$0], dir0=[ASC])
2728
EnumerableCalc(expr#0..2=[{inputs}], age=[$t0], $1=[$t2])
2829
EnumerableWindow(window#0=[window(order by [1 DESC-nulls-last] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])
2930
EnumerableCalc(expr#0..2=[{inputs}], expr#3=[0], expr#4=[=($t2, $t3)], expr#5=[null:DOUBLE], expr#6=[CASE($t4, $t5, $t1)], expr#7=[/($t6, $t2)], age=[$t0], __grand_total__=[$t7])
30-
EnumerableAggregate(group=[{1}], agg#0=[$SUM0($0)], agg#1=[COUNT($0)])
31-
EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t1)], avg(balance)=[$t0], $f1=[$t2])
32-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[avg(balance), age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])
31+
EnumerableAggregate(group=[{0}], agg#0=[$SUM0($1)], agg#1=[COUNT($1)])
32+
EnumerableCalc(expr#0..1=[{inputs}], expr#2=[SAFE_CAST($t0)], expr#3=[IS NOT NULL($t2)], $f0=[$t2], avg(balance)=[$t1], $condition=[$t3])
33+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=RelSubset#,group={0, 1},avg(balance)=AVG($2)), PROJECT->[age, avg(balance)]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":true,"missing_order":"first","order":"asc"}}},{"age":{"terms":{"field":"age","missing_bucket":true,"missing_order":"first","order":"asc"}}}]},"aggregations":{"avg(balance)":{"avg":{"field":"balance"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)