Skip to content

Commit 3b3c1cf

Browse files
authored
HIVE-29084: Wrong results for LATERAL VIEW queries due to incorrect WHERE filter removal (#6014)
1 parent f753d25 commit 3b3c1cf

File tree

4 files changed

+112
-21
lines changed

4 files changed

+112
-21
lines changed

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/ASTConverter.java

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -599,15 +599,7 @@ private QueryBlockInfo convertSource(RelNode r) throws CalciteSemanticException
599599
}
600600
} else if (isLateralView(r)) {
601601
TableFunctionScan tfs = ((TableFunctionScan) r);
602-
603-
// retrieve the base table source.
604-
QueryBlockInfo tableFunctionSource = convertSource(tfs.getInput(0));
605-
String sqAlias = tableFunctionSource.schema.get(0).table;
606-
// the schema will contain the base table source fields
607-
s = new Schema(tfs, sqAlias);
608-
609-
ast = createASTLateralView(tfs, s, tableFunctionSource, sqAlias);
610-
602+
return createASTLateralView(tfs, convertSource(tfs.getInput(0)), nextAlias());
611603
} else if (r instanceof TableSpool) {
612604
TableSpool spool = (TableSpool) r;
613605
ASTConverter cteConverter =
@@ -661,8 +653,8 @@ private ASTNode pkFkHint(int fkTableIndex, boolean nonFkSideIsFiltered) {
661653
}
662654
}
663655

664-
private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
665-
QueryBlockInfo tableFunctionSource, String sqAlias) {
656+
private static QueryBlockInfo createASTLateralView(TableFunctionScan tfs, QueryBlockInfo tableFunctionSource,
657+
String alias) {
666658
// The structure of the AST LATERAL VIEW will be:
667659
//
668660
// TOK_LATERAL_VIEW
@@ -683,7 +675,7 @@ private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
683675
RexCall lateralCall = (RexCall) tfs.getCall();
684676
RexCall call = (RexCall) lateralCall.getOperands().get(0);
685677
for (RexNode rn : call.getOperands()) {
686-
ASTNode expr = rn.accept(new RexVisitor(s, rn instanceof RexLiteral,
678+
ASTNode expr = rn.accept(new RexVisitor(tableFunctionSource.schema, rn instanceof RexLiteral,
687679
tfs.getCluster().getRexBuilder()));
688680
children.add(expr);
689681
}
@@ -695,16 +687,15 @@ private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
695687

696688
// Add only the table generated size columns to the select expr for the function,
697689
// skipping over the base table columns from the input side of the join.
698-
int i = 0;
699-
for (ColumnInfo c : s) {
700-
if (i++ < tableFunctionSource.schema.size()) {
701-
continue;
702-
}
703-
selexpr.add(HiveParser.Identifier, c.column);
690+
List<RelDataTypeField> lvFields = tfs.getRowType().getFieldList()
691+
.subList(tableFunctionSource.schema.size(), tfs.getRowType().getFieldCount());
692+
for (RelDataTypeField field : lvFields) {
693+
selexpr.add(HiveParser.Identifier, field.getName());
704694
}
695+
705696
// add the table alias for the lateral view.
706697
ASTBuilder tabAlias = ASTBuilder.construct(HiveParser.TOK_TABALIAS, "TOK_TABALIAS");
707-
tabAlias.add(HiveParser.Identifier, sqAlias);
698+
tabAlias.add(HiveParser.Identifier, alias);
708699

709700
// add the table alias to the SEL_EXPR
710701
selexpr.add(tabAlias.node());
@@ -720,7 +711,8 @@ private static ASTNode createASTLateralView(TableFunctionScan tfs, Schema s,
720711
// finally, add the LATERAL VIEW clause under the left side source which is the base table.
721712
lateralview.add(tableFunctionSource.ast);
722713

723-
return lateralview.node();
714+
Schema outputSchema = new Schema(tableFunctionSource.schema, new Schema(alias, lvFields));
715+
return new QueryBlockInfo(outputSchema, lateralview.node());
724716
}
725717

726718
private boolean isLateralView(RelNode relNode) {
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
-- SORT_QUERY_RESULTS
2+
3+
-- Verifies PPD doesn't eliminate an OR filter on the values of the same LV column
4+
SELECT t.key, lv.col
5+
FROM (SELECT stack(2, '0', '1') AS key) t
6+
LATERAL VIEW explode(array('2', '3')) lv AS col
7+
WHERE t.key = '0' OR lv.col = '3';
8+
9+
-- Verifies PPD doesn't eliminate inequality filter between base table and LV columns
10+
SELECT t.key, lv.col
11+
FROM (SELECT '1' AS key) t
12+
LATERAL VIEW explode(array('1', '2')) lv AS col
13+
WHERE t.key != lv.col;
14+
15+
-- Verifies PPD doesn't eliminate OR filter between different LV columns
16+
SELECT t.*, lv1.col1, lv2.col2
17+
FROM (SELECT 1) t
18+
LATERAL VIEW explode(array('a', 'b')) lv1 AS col1
19+
LATERAL VIEW explode(array('b', 'c')) lv2 AS col2
20+
WHERE lv1.col1 = 'a' OR lv2.col2 = 'c';
21+
22+
-- Verifies PPD doesn't eliminate complex filter with three LV columns
23+
SELECT t.*, lv1.col1, lv2.col2, lv3.col3
24+
FROM (SELECT 1) t
25+
LATERAL VIEW explode(array('x', 'y')) lv1 AS col1
26+
LATERAL VIEW explode(array('x', 'y')) lv2 AS col2
27+
LATERAL VIEW explode(array('x', 'y')) lv3 AS col3
28+
WHERE lv1.col1 != lv2.col2 AND lv2.col2 != lv3.col3;
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
PREHOOK: query: SELECT t.key, lv.col
2+
FROM (SELECT stack(2, '0', '1') AS key) t
3+
LATERAL VIEW explode(array('2', '3')) lv AS col
4+
WHERE t.key = '0' OR lv.col = '3'
5+
PREHOOK: type: QUERY
6+
PREHOOK: Input: _dummy_database@_dummy_table
7+
#### A masked pattern was here ####
8+
POSTHOOK: query: SELECT t.key, lv.col
9+
FROM (SELECT stack(2, '0', '1') AS key) t
10+
LATERAL VIEW explode(array('2', '3')) lv AS col
11+
WHERE t.key = '0' OR lv.col = '3'
12+
POSTHOOK: type: QUERY
13+
POSTHOOK: Input: _dummy_database@_dummy_table
14+
#### A masked pattern was here ####
15+
0 2
16+
0 3
17+
1 3
18+
PREHOOK: query: SELECT t.key, lv.col
19+
FROM (SELECT '1' AS key) t
20+
LATERAL VIEW explode(array('1', '2')) lv AS col
21+
WHERE t.key != lv.col
22+
PREHOOK: type: QUERY
23+
PREHOOK: Input: _dummy_database@_dummy_table
24+
#### A masked pattern was here ####
25+
POSTHOOK: query: SELECT t.key, lv.col
26+
FROM (SELECT '1' AS key) t
27+
LATERAL VIEW explode(array('1', '2')) lv AS col
28+
WHERE t.key != lv.col
29+
POSTHOOK: type: QUERY
30+
POSTHOOK: Input: _dummy_database@_dummy_table
31+
#### A masked pattern was here ####
32+
1 2
33+
PREHOOK: query: SELECT t.*, lv1.col1, lv2.col2
34+
FROM (SELECT 1) t
35+
LATERAL VIEW explode(array('a', 'b')) lv1 AS col1
36+
LATERAL VIEW explode(array('b', 'c')) lv2 AS col2
37+
WHERE lv1.col1 = 'a' OR lv2.col2 = 'c'
38+
PREHOOK: type: QUERY
39+
PREHOOK: Input: _dummy_database@_dummy_table
40+
#### A masked pattern was here ####
41+
POSTHOOK: query: SELECT t.*, lv1.col1, lv2.col2
42+
FROM (SELECT 1) t
43+
LATERAL VIEW explode(array('a', 'b')) lv1 AS col1
44+
LATERAL VIEW explode(array('b', 'c')) lv2 AS col2
45+
WHERE lv1.col1 = 'a' OR lv2.col2 = 'c'
46+
POSTHOOK: type: QUERY
47+
POSTHOOK: Input: _dummy_database@_dummy_table
48+
#### A masked pattern was here ####
49+
1 a b
50+
1 a c
51+
1 b c
52+
PREHOOK: query: SELECT t.*, lv1.col1, lv2.col2, lv3.col3
53+
FROM (SELECT 1) t
54+
LATERAL VIEW explode(array('x', 'y')) lv1 AS col1
55+
LATERAL VIEW explode(array('x', 'y')) lv2 AS col2
56+
LATERAL VIEW explode(array('x', 'y')) lv3 AS col3
57+
WHERE lv1.col1 != lv2.col2 AND lv2.col2 != lv3.col3
58+
PREHOOK: type: QUERY
59+
PREHOOK: Input: _dummy_database@_dummy_table
60+
#### A masked pattern was here ####
61+
POSTHOOK: query: SELECT t.*, lv1.col1, lv2.col2, lv3.col3
62+
FROM (SELECT 1) t
63+
LATERAL VIEW explode(array('x', 'y')) lv1 AS col1
64+
LATERAL VIEW explode(array('x', 'y')) lv2 AS col2
65+
LATERAL VIEW explode(array('x', 'y')) lv3 AS col3
66+
WHERE lv1.col1 != lv2.col2 AND lv2.col2 != lv3.col3
67+
POSTHOOK: type: QUERY
68+
POSTHOOK: Input: _dummy_database@_dummy_table
69+
#### A masked pattern was here ####
70+
1 x y x
71+
1 y x y

ql/src/test/results/clientpositive/llap/lineage2.q.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ PREHOOK: query: select identity, ep1_id from relations
724724
PREHOOK: type: QUERY
725725
PREHOOK: Input: default@relations
726726
#### A masked pattern was here ####
727-
{"version":"1.0","engine":"tez","database":"default","hash":"436a649a0d9540e8f093f8353d86813a","queryText":"select identity, ep1_id from relations\n lateral view explode(ep1_ids) nav_rel as ep1_id","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"relations._col12","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"identity"},{"id":1,"vertexType":"COLUMN","vertexId":"ep1_id"},{"id":2,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":3,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"}]}
727+
{"version":"1.0","engine":"tez","database":"default","hash":"436a649a0d9540e8f093f8353d86813a","queryText":"select identity, ep1_id from relations\n lateral view explode(ep1_ids) nav_rel as ep1_id","edges":[{"sources":[2],"targets":[0],"edgeType":"PROJECTION"},{"sources":[3],"targets":[1],"expression":"nav_rel.ep1_id","edgeType":"PROJECTION"}],"vertices":[{"id":0,"vertexType":"COLUMN","vertexId":"identity"},{"id":1,"vertexType":"COLUMN","vertexId":"ep1_id"},{"id":2,"vertexType":"COLUMN","vertexId":"default.relations.identity"},{"id":3,"vertexType":"COLUMN","vertexId":"default.relations.ep1_ids"}]}
728728
PREHOOK: query: insert into rels_exploded select identity, type,
729729
ep1_src_type, ep1_type, ep2_src_type, ep2_type, ep1_id, ep2_id
730730
from relations lateral view explode(ep1_ids) rel1 as ep1_id

0 commit comments

Comments
 (0)