diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java index 705b04728c4c..a154d8cda52a 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java @@ -506,10 +506,20 @@ private RexNode rewriteInExists(RexSubQuery e, Set variablesSet, } switch (logic) { case TRUE: - builder.join(JoinRelType.SEMI, builder.and(conditions), variablesSet); + if (!variablesSet.isEmpty()) { + // This is correlated. Use SemiJoin. + builder.join(JoinRelType.SEMI, builder.and(conditions), variablesSet); + } else { + builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet); + } return builder.literal(true); case FALSE: - builder.join(JoinRelType.ANTI, builder.and(conditions), variablesSet); + if (!variablesSet.isEmpty()) { + // This is correlated. Use AntiJoin. + builder.join(JoinRelType.ANTI, builder.and(conditions), variablesSet); + } else { + builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); + } return builder.literal(false); } builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet); diff --git a/ql/src/test/queries/clientpositive/subquery_in.q b/ql/src/test/queries/clientpositive/subquery_in.q index a5b3ce795173..80d4fe587547 100644 --- a/ql/src/test/queries/clientpositive/subquery_in.q +++ b/ql/src/test/queries/clientpositive/subquery_in.q @@ -305,4 +305,15 @@ select * from part where p_size IN (select max(p_size) from part p where p.p_typ explain select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); select * from part where p_size IN (select pp.p_size from part p join part pp on pp.p_type = p.p_type where part.p_type <> p.p_name); +-- Two uncorrelated subqueries that join reordering rule can be applied on. +create table big (col1 string, col2 int, col3 double); +create table mid (col1 string, col2 int, col3 double); +create table small (col1 string, col2 int, col3 double); + +alter table big update statistics set('numRows'='123456', 'rawDataSize'='1234567'); +alter table mid update statistics set('numRows'='12345', 'rawDataSize'='123456'); +alter table small update statistics set('numRows'='1234', 'rawDataSize'='12345'); + +explain cbo +select col1 from small where col2 in (select col2 from mid) and col3 in (select col3 from big); diff --git a/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out b/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out index 6a1a5619f1b9..a8914d032d78 100644 --- a/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out +++ b/ql/src/test/results/clientpositive/llap/groupby_groupingset_bug.q.out @@ -219,97 +219,109 @@ POSTHOOK: Input: default@x1_store_sales@ss_sold_date_sk=2 Plan optimized by CBO. Vertex dependency in root stage -Map 1 <- Map 4 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE) -Reducer 2 <- Map 1 (SIMPLE_EDGE) -Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +Map 1 <- Reducer 3 (BROADCAST_EDGE) +Map 4 <- Map 1 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE) +Map 7 <- Reducer 9 (BROADCAST_EDGE) +Reducer 3 <- Map 2 (SIMPLE_EDGE) Reducer 5 <- Map 4 (SIMPLE_EDGE) -Reducer 7 <- Map 6 (SIMPLE_EDGE) +Reducer 6 <- Reducer 5 (SIMPLE_EDGE) +Reducer 9 <- Map 8 (SIMPLE_EDGE) Stage-0 Fetch Operator limit:100 Stage-1 - Reducer 3 vectorized, llap - File Output Operator [FS_149] - Limit [LIM_148] (rows=2 width=8) + Reducer 6 vectorized, llap + File Output Operator [FS_145] + Limit [LIM_144] (rows=2 width=8) Number of rows:100 - Select Operator [SEL_147] (rows=2 width=8) + Select Operator [SEL_143] (rows=2 width=8) Output:["_col0"] - <-Reducer 2 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_146] - Select Operator [SEL_145] (rows=2 width=8) + <-Reducer 5 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_142] + Select Operator [SEL_141] (rows=2 width=8) Output:["_col0"] - Top N Key Operator [TNK_144] (rows=2 width=12) + Top N Key Operator [TNK_140] (rows=2 width=12) keys:_col1,top n:100 - Group By Operator [GBY_143] (rows=2 width=12) + Group By Operator [GBY_139] (rows=2 width=12) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap - SHUFFLE [RS_38] + <-Map 4 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_138] PartitionCols:_col0 - Group By Operator [GBY_37] (rows=10 width=12) - Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 - Select Operator [SEL_36] (rows=5185194 width=228) - Output:["_col0"] - Map Join Operator [MAPJOIN_107] (rows=5185194 width=228) - Conds:SEL_24._col1=RS_123._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)} - <-Reducer 5 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_123] - PartitionCols:_col1 - Select Operator [SEL_122] (rows=1 width=197) + Group By Operator [GBY_137] (rows=2 width=12) + Output:["_col0","_col1"],aggregations:["count()"],keys:_col8 + Map Join Operator [MAPJOIN_136] (rows=5185194 width=4) + Conds:MAPJOIN_135._col6=RS_129._col0(Inner),Output:["_col8"] + <-Map 7 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_129] + PartitionCols:_col0 + Map Join Operator [MAPJOIN_128] (rows=28 width=8) + Conds:SEL_127._col1=RS_125._col0(Inner),Output:["_col0","_col1"] + <-Reducer 9 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_125] + PartitionCols:_col0 + Group By Operator [GBY_124] (rows=2 width=4) + Output:["_col0"],keys:KEY._col0 + <-Map 8 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_123] + PartitionCols:_col0 + Group By Operator [GBY_122] (rows=2 width=4) + Output:["_col0"],keys:d_month_seq + Select Operator [SEL_121] (rows=4 width=12) + Output:["d_month_seq"] + Filter Operator [FIL_120] (rows=4 width=12) + predicate:((d_year = 2000) and ((d_year * d_moy) > 200000) and (d_moy = 2) and d_month_seq is not null) + TableScan [TS_17] (rows=28 width=12) + default@x1_date_dim,x1_date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_month_seq","d_year","d_moy"] + <-Select Operator [SEL_127] (rows=28 width=8) Output:["_col0","_col1"] - Filter Operator [FIL_121] (rows=1 width=197) - predicate:_col1 is not null - Group By Operator [GBY_120] (rows=1 width=197) - Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 - <-Map 4 [SIMPLE_EDGE] vectorized, llap - SHUFFLE [RS_119] - PartitionCols:_col0 - Group By Operator [GBY_117] (rows=1 width=197) - Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category - Filter Operator [FIL_115] (rows=18 width=197) - predicate:i_category is not null - TableScan [TS_3] (rows=18 width=201) - default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"] - <-Select Operator [SEL_24] (rows=15555582 width=201) - Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_106] (rows=15555582 width=201) - Conds:SEL_15._col3=RS_132._col0(Left Semi),Output:["_col3","_col5","_col6"] - <-Map 6 [BROADCAST_EDGE] vectorized, llap - SHUFFLE [RS_132] - PartitionCols:_col0 - Group By Operator [GBY_130] (rows=2 width=4) - Output:["_col0"],keys:_col0 - Select Operator [SEL_127] (rows=4 width=4) - Output:["_col0"] - Filter Operator [FIL_125] (rows=4 width=12) - predicate:((d_year = 2000) and ((d_year * d_moy) > 200000) and (d_moy = 2) and d_month_seq is not null) - TableScan [TS_6] (rows=28 width=8) - default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq","d_year","d_moy"] - <-Select Operator [SEL_15] (rows=15555582 width=201) - Output:["_col3","_col5","_col6"] - Map Join Operator [MAPJOIN_105] (rows=15555582 width=201) - Conds:MAPJOIN_104._col1=RS_135._col0(Inner),Output:["_col3","_col4","_col6"] - <-Reducer 7 [BROADCAST_EDGE] vectorized, llap - BROADCAST [RS_135] - PartitionCols:_col0 - Select Operator [SEL_134] - Output:["_col0","_col1"] - <-Map Join Operator [MAPJOIN_104] (rows=1111113 width=201) - Conds:SEL_2._col0=RS_118._col0(Inner),Output:["_col1","_col3","_col4"] - <-Map 4 [BROADCAST_EDGE] vectorized, llap - SHUFFLE [RS_118] - PartitionCols:_col0 - Select Operator [SEL_116] (rows=18 width=201) - Output:["_col0","_col1","_col2"] - Filter Operator [FIL_114] (rows=18 width=201) - predicate:(i_item_sk is not null and i_category is not null and i_current_price is not null) - Please refer to the previous TableScan [TS_3] - <-Select Operator [SEL_2] (rows=123457 width=8) - Output:["_col0","_col1"] - Filter Operator [FIL_59] (rows=123457 width=8) - predicate:ss_item_sk is not null - TableScan [TS_0] (rows=123457 width=8) - default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] + Filter Operator [FIL_126] (rows=28 width=8) + predicate:(d_date_sk is not null and d_month_seq is not null) + TableScan [TS_14] (rows=28 width=8) + default@x1_date_dim,d,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_month_seq"] + Dynamic Partitioning Event Operator [EVENT_132] (rows=1 width=4) + Group By Operator [GBY_131] (rows=1 width=4) + Output:["_col0"],keys:_col0 + Select Operator [SEL_130] (rows=28 width=4) + Output:["_col0"] + Please refer to the previous Map Join Operator [MAPJOIN_128] + <-Map Join Operator [MAPJOIN_135] (rows=370371 width=4) + Conds:RS_31._col0=SEL_134._col0(Inner),Output:["_col6"] + <-Map 1 [BROADCAST_EDGE] llap + BROADCAST [RS_31] + PartitionCols:_col0 + Map Join Operator [MAPJOIN_103] (rows=6 width=228) + Conds:SEL_2._col1=RS_117._col1(Inner),Output:["_col0","_col2","_col3"],residual filter predicates:{(_col2 > _col3)} + <-Reducer 3 [BROADCAST_EDGE] vectorized, llap + BROADCAST [RS_117] + PartitionCols:_col1 + Select Operator [SEL_116] (rows=1 width=197) + Output:["_col0","_col1"] + Filter Operator [FIL_115] (rows=1 width=197) + predicate:_col1 is not null + Group By Operator [GBY_114] (rows=1 width=197) + Output:["_col0","_col1"],aggregations:["min(VALUE._col0)"],keys:KEY._col0 + <-Map 2 [SIMPLE_EDGE] vectorized, llap + SHUFFLE [RS_113] + PartitionCols:_col0 + Group By Operator [GBY_112] (rows=1 width=197) + Output:["_col0","_col1"],aggregations:["min(i_current_price)"],keys:i_category + Filter Operator [FIL_111] (rows=18 width=197) + predicate:i_category is not null + TableScan [TS_3] (rows=18 width=197) + default@x1_item,j,Tbl:COMPLETE,Col:COMPLETE,Output:["i_category","i_current_price"] + <-Select Operator [SEL_2] (rows=18 width=201) + Output:["_col0","_col1","_col2"] + Filter Operator [FIL_58] (rows=18 width=201) + predicate:(i_item_sk is not null and i_category is not null and i_current_price is not null) + TableScan [TS_0] (rows=18 width=201) + default@x1_item,i,Tbl:COMPLETE,Col:COMPLETE,Output:["i_item_sk","i_category","i_current_price"] + <-Select Operator [SEL_134] (rows=123457 width=8) + Output:["_col0","_col1"] + Filter Operator [FIL_133] (rows=123457 width=8) + predicate:ss_item_sk is not null + TableScan [TS_11] (rows=123457 width=8) + default@x1_store_sales,s,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_item_sk"] PREHOOK: query: select count(*) cnt from diff --git a/ql/src/test/results/clientpositive/llap/keep_uniform.q.out b/ql/src/test/results/clientpositive/llap/keep_uniform.q.out index 4354e788e6a2..9a34967c3a04 100644 --- a/ql/src/test/results/clientpositive/llap/keep_uniform.q.out +++ b/ql/src/test/results/clientpositive/llap/keep_uniform.q.out @@ -447,16 +447,18 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 10 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) - Reducer 11 <- Map 1 (SIMPLE_EDGE), Reducer 10 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE) - Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) - Reducer 4 <- Map 14 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE) - Reducer 6 <- Reducer 11 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 11 <- Map 1 (SIMPLE_EDGE), Map 17 (SIMPLE_EDGE) + Reducer 12 <- Map 1 (SIMPLE_EDGE), Reducer 11 (SIMPLE_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 14 (SIMPLE_EDGE) + Reducer 3 <- Map 15 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 10 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) + Reducer 6 <- Map 18 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 7 <- Reducer 6 (SIMPLE_EDGE) Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) - Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 15 (SIMPLE_EDGE) + Reducer 9 <- Map 1 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -484,18 +486,18 @@ STAGE PLANS: projectedOutputColumnNums: [2, 11, 13, 17, 28, 33] Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int) + key expressions: _col1 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: int) + Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumns: 2:int + keyColumns: 11:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumns: 11:int, 13:int, 17:int, 28:decimal(7,2), 33:decimal(7,2) + valueColumns: 2:int, 13:int, 17:int, 28:decimal(7,2), 33:decimal(7,2) Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -567,59 +569,7 @@ STAGE PLANS: dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 12 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:d_date_sk:int, 1:d_date_id:string, 2:d_date:string, 3:d_month_seq:int, 4:d_week_seq:int, 5:d_quarter_seq:int, 6:d_year:int, 7:d_dow:int, 8:d_moy:int, 9:d_dom:int, 10:d_qoy:int, 11:d_fy_year:int, 12:d_fy_quarter_seq:int, 13:d_fy_week_seq:int, 14:d_day_name:string, 15:d_quarter_name:string, 16:d_holiday:string, 17:d_weekend:string, 18:d_following_holiday:string, 19:d_first_dom:int, 20:d_last_dom:int, 21:d_same_day_ly:int, 22:d_same_day_lq:int, 23:d_current_day:string, 24:d_current_week:string, 25:d_current_month:string, 26:d_current_quarter:string, 27:d_current_year:string, 28:ROW__ID:struct, 29:ROW__IS__DELETED:boolean] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 30:timestamp, left 1999-04-30 17:00:00.0, right 1999-06-29 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 30:timestamp), SelectColumnIsNotNull(col 0:int)) - predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: d_date_sk (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [DECIMAL_64] - featureSupportInUse: [DECIMAL_64] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 28 - includeColumns: [0, 2] - dataColumns: d_date_sk:int, d_date_id:string, d_date:string, d_month_seq:int, d_week_seq:int, d_quarter_seq:int, d_year:int, d_dow:int, d_moy:int, d_dom:int, d_qoy:int, d_fy_year:int, d_fy_quarter_seq:int, d_fy_week_seq:int, d_day_name:string, d_quarter_name:string, d_holiday:string, d_weekend:string, d_following_holiday:string, d_first_dom:int, d_last_dom:int, d_same_day_ly:int, d_same_day_lq:int, d_current_day:string, d_current_week:string, d_current_month:string, d_current_quarter:string, d_current_year:string - partitionColumnCount: 0 - scratchColumnTypeNames: [timestamp] - Map 13 + Map 14 Map Operator Tree: TableScan alias: customer_address @@ -671,7 +621,7 @@ STAGE PLANS: dataColumns: ca_address_sk:int, ca_address_id:string, ca_street_number:string, ca_street_name:string, ca_street_type:string, ca_suite_number:string, ca_city:string, ca_county:string, ca_state:string, ca_zip:string, ca_country:string, ca_gmt_offset:decimal(5,2)/DECIMAL_64, ca_location_type:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 14 + Map 15 Map Operator Tree: TableScan alias: web_site @@ -723,10 +673,10 @@ STAGE PLANS: dataColumns: web_site_sk:int, web_site_id:string, web_rec_start_date:string, web_rec_end_date:string, web_name:string, web_open_date_sk:int, web_close_date_sk:int, web_class:string, web_manager:string, web_mkt_id:int, web_mkt_class:string, web_mkt_desc:string, web_market_manager:string, web_company_id:int, web_company_name:string, web_street_number:string, web_street_name:string, web_street_type:string, web_suite_number:string, web_city:string, web_county:string, web_state:string, web_zip:string, web_country:string, web_gmt_offset:decimal(5,2)/DECIMAL_64, web_tax_percentage:decimal(5,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 15 + Map 16 Map Operator Tree: TableScan - alias: ws2 + alias: ws1 filterExpr: ws_order_number is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: @@ -777,7 +727,7 @@ STAGE PLANS: dataColumns: ws_sold_date_sk:int, ws_sold_time_sk:int, ws_ship_date_sk:int, ws_item_sk:int, ws_bill_customer_sk:int, ws_bill_cdemo_sk:int, ws_bill_hdemo_sk:int, ws_bill_addr_sk:int, ws_ship_customer_sk:int, ws_ship_cdemo_sk:int, ws_ship_hdemo_sk:int, ws_ship_addr_sk:int, ws_web_page_sk:int, ws_web_site_sk:int, ws_ship_mode_sk:int, ws_warehouse_sk:int, ws_promo_sk:int, ws_order_number:int, ws_quantity:int, ws_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_list_price:decimal(7,2)/DECIMAL_64, ws_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_discount_amt:decimal(7,2)/DECIMAL_64, ws_ext_sales_price:decimal(7,2)/DECIMAL_64, ws_ext_wholesale_cost:decimal(7,2)/DECIMAL_64, ws_ext_list_price:decimal(7,2)/DECIMAL_64, ws_ext_tax:decimal(7,2)/DECIMAL_64, ws_coupon_amt:decimal(7,2)/DECIMAL_64, ws_ext_ship_cost:decimal(7,2)/DECIMAL_64, ws_net_paid:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_tax:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship:decimal(7,2)/DECIMAL_64, ws_net_paid_inc_ship_tax:decimal(7,2)/DECIMAL_64, ws_net_profit:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 16 + Map 17 Map Operator Tree: TableScan alias: web_returns @@ -829,7 +779,98 @@ STAGE PLANS: dataColumns: wr_returned_date_sk:int, wr_returned_time_sk:int, wr_item_sk:int, wr_refunded_customer_sk:int, wr_refunded_cdemo_sk:int, wr_refunded_hdemo_sk:int, wr_refunded_addr_sk:int, wr_returning_customer_sk:int, wr_returning_cdemo_sk:int, wr_returning_hdemo_sk:int, wr_returning_addr_sk:int, wr_web_page_sk:int, wr_reason_sk:int, wr_order_number:int, wr_return_quantity:int, wr_return_amt:decimal(7,2)/DECIMAL_64, wr_return_tax:decimal(7,2)/DECIMAL_64, wr_return_amt_inc_tax:decimal(7,2)/DECIMAL_64, wr_fee:decimal(7,2)/DECIMAL_64, wr_return_ship_cost:decimal(7,2)/DECIMAL_64, wr_refunded_cash:decimal(7,2)/DECIMAL_64, wr_reversed_charge:decimal(7,2)/DECIMAL_64, wr_account_credit:decimal(7,2)/DECIMAL_64, wr_net_loss:decimal(7,2)/DECIMAL_64 partitionColumnCount: 0 scratchColumnTypeNames: [] + Map 18 + Map Operator Tree: + TableScan + alias: date_dim + filterExpr: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:d_date_sk:int, 1:d_date_id:string, 2:d_date:string, 3:d_month_seq:int, 4:d_week_seq:int, 5:d_quarter_seq:int, 6:d_year:int, 7:d_dow:int, 8:d_moy:int, 9:d_dom:int, 10:d_qoy:int, 11:d_fy_year:int, 12:d_fy_quarter_seq:int, 13:d_fy_week_seq:int, 14:d_day_name:string, 15:d_quarter_name:string, 16:d_holiday:string, 17:d_weekend:string, 18:d_following_holiday:string, 19:d_first_dom:int, 20:d_last_dom:int, 21:d_same_day_ly:int, 22:d_same_day_lq:int, 23:d_current_day:string, 24:d_current_week:string, 25:d_current_month:string, 26:d_current_quarter:string, 27:d_current_year:string, 28:ROW__ID:struct, 29:ROW__IS__DELETED:boolean] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterTimestampColumnBetween(col 30:timestamp, left 1999-04-30 17:00:00.0, right 1999-06-29 17:00:00.0)(children: CastStringToTimestamp(col 2:string) -> 30:timestamp), SelectColumnIsNotNull(col 0:int)) + predicate: (CAST( d_date AS TIMESTAMP) BETWEEN TIMESTAMP'1999-05-01 00:00:00' AND TIMESTAMP'1999-06-30 00:00:00' and d_date_sk is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: d_date_sk (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [DECIMAL_64] + featureSupportInUse: [DECIMAL_64] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 28 + includeColumns: [0, 2] + dataColumns: d_date_sk:int, d_date_id:string, d_date:string, d_month_seq:int, d_week_seq:int, d_quarter_seq:int, d_year:int, d_dow:int, d_moy:int, d_dom:int, d_qoy:int, d_fy_year:int, d_fy_quarter_seq:int, d_fy_week_seq:int, d_day_name:string, d_quarter_name:string, d_holiday:string, d_weekend:string, d_following_holiday:string, d_first_dom:int, d_last_dom:int, d_same_day_ly:int, d_same_day_lq:int, d_current_day:string, d_current_week:string, d_current_month:string, d_current_quarter:string, d_current_year:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp] Reducer 10 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reducer 11 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -850,7 +891,7 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false - Reducer 11 + Reducer 12 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -864,10 +905,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col2 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col2 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: _col2 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 @@ -881,6 +922,45 @@ STAGE PLANS: MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 13 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez] IS true + reduceColumnNullOrder: z + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + keyExpressions: col 0:int + native: false + vectorProcessingMode: MERGE_PARTIAL + projectedOutputColumnNums: [] + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -888,17 +968,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col0 (type: int) + 0 _col1 (type: int) 1 _col0 (type: int) - outputColumnNames: _col1, _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col2 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: int) + Map-reduce partition columns: _col2 (type: int) Statistics: Num rows: 1 Data size: 264 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -909,17 +989,17 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col2 (type: int) 1 _col0 (type: int) - outputColumnNames: _col2, _col3, _col4, _col5 + outputColumnNames: _col0, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 290 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col3 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 1 Data size: 290 Basic stats: COMPLETE Column stats: NONE - value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -930,9 +1010,9 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: int) + 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 + outputColumnNames: _col0, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 319 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: int) @@ -940,7 +1020,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col3 (type: int) Statistics: Num rows: 1 Data size: 319 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col0 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -949,19 +1029,19 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col3 (type: int) 1 _col0 (type: int) - outputColumnNames: _col3, _col4, _col5 + outputColumnNames: _col0, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: int) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: int) + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 1 Data size: 350 Basic stats: COMPLETE Column stats: NONE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col3 (type: int), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) MergeJoin Vectorization: enabled: false enableConditionsNotMet: Vectorizing MergeJoin Supported IS false @@ -970,9 +1050,9 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col3 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) outputColumnNames: _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE @@ -1097,10 +1177,10 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col1 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col1 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Group By Operator - keys: _col0 (type: int) + keys: _col1 (type: int) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/llap/semijoin4.q.out b/ql/src/test/results/clientpositive/llap/semijoin4.q.out index f98a0a210f50..e09b70f29927 100644 --- a/ql/src/test/results/clientpositive/llap/semijoin4.q.out +++ b/ql/src/test/results/clientpositive/llap/semijoin4.q.out @@ -100,13 +100,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: bigint_col_13 (type: bigint), decimal1309_col_65 (type: decimal(13,9)) - outputColumnNames: _col0, _col2 + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: decimal(27,9)), _col0 (type: bigint) + key expressions: _col0 (type: bigint), _col1 (type: decimal(27,9)) null sort order: zz sort order: ++ - Map-reduce partition columns: _col2 (type: decimal(27,9)), _col0 (type: bigint) + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: decimal(27,9)) Statistics: Num rows: 1 Data size: 124 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -133,14 +133,14 @@ STAGE PLANS: predicate: ((tinyint_col_21 = -92Y) and UDFToLong(tinyint_col_18) is not null and decimal2709_col_9 is not null) (type: boolean) Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: decimal2709_col_9 (type: decimal(27,9)), tinyint_col_18 (type: tinyint) + expressions: decimal2709_col_9 (type: decimal(27,9)), UDFToLong(tinyint_col_18) (type: bigint) outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) + key expressions: _col1 (type: bigint), _col0 (type: decimal(27,9)) null sort order: zz sort order: ++ - Map-reduce partition columns: _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) + Map-reduce partition columns: _col1 (type: bigint), _col0 (type: decimal(27,9)) Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -185,7 +185,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 @@ -208,8 +208,8 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 _col2 (type: decimal(27,9)), _col0 (type: bigint) - 1 _col0 (type: decimal(27,9)), UDFToLong(_col1) (type: bigint) + 0 _col0 (type: bigint), _col1 (type: decimal(27,9)) + 1 _col1 (type: bigint), _col0 (type: decimal(27,9)) Statistics: Num rows: 1 Data size: 136 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator null sort order: diff --git a/ql/src/test/results/clientpositive/llap/sort_acid.q.out b/ql/src/test/results/clientpositive/llap/sort_acid.q.out index 30b5d8736ac3..956d2509270c 100644 --- a/ql/src/test/results/clientpositive/llap/sort_acid.q.out +++ b/ql/src/test/results/clientpositive/llap/sort_acid.q.out @@ -116,9 +116,9 @@ POSTHOOK: Input: default@acidtlb POSTHOOK: Output: default@acidtlb POSTHOOK: Output: default@acidtlb CBO PLAN: -HiveProject(row__id=[$2], a=[CAST(30):INTEGER], b=[350], e=[$1]) - HiveSemiJoin(condition=[true], joinType=[semi]) - HiveProject(a=[CAST(30):INTEGER], e=[$2], ROW__ID=[$5]) +HiveProject(row__id=[$1], a=[CAST(30):INTEGER], b=[350], e=[$0]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(e=[$2], ROW__ID=[$5]) HiveFilter(condition=[=($0, 30)]) HiveTableScan(table=[[default, acidtlb]], table:alias=[acidtlb]) HiveProject($f0=[$0]) @@ -170,7 +170,7 @@ POSTHOOK: Input: default@othertlb POSTHOOK: Output: default@acidtlb POSTHOOK: Output: default@acidtlb CBO PLAN: -HiveProject(row__id=[$2], a=[$0], b=[450], e=[$1]) +HiveProject(ROW__ID=[$2], a=[$0], $f2=[450], e=[$1]) HiveSemiJoin(condition=[=($0, $3)], joinType=[semi]) HiveProject(a=[$0], e=[$2], ROW__ID=[$5]) HiveFilter(condition=[<($0, 65)]) @@ -228,7 +228,7 @@ POSTHOOK: Input: default@othertlb POSTHOOK: Output: default@acidtlb CBO PLAN: HiveSortExchange(distribution=[any], collation=[[0]]) - HiveProject(row__id=[$1]) + HiveProject(ROW__ID=[$1]) HiveSemiJoin(condition=[=($0, $2)], joinType=[semi]) HiveProject(a=[$0], ROW__ID=[$5]) HiveFilter(condition=[IS NOT NULL($0)]) diff --git a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out index 915c3f1c448d..dcbfc1f810d8 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_ANY.q.out @@ -227,7 +227,7 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]) - HiveSemiJoin(condition=[=($0, $1)], joinType=[semi]) + HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(p_partkey=[$0]) HiveFilter(condition=[IS NOT NULL($0)]) HiveTableScan(table=[[default, part]], table:alias=[part]) @@ -375,7 +375,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part #### A masked pattern was here #### 26 -Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain cbo select count(*) from part where p_partkey >= ANY (select p_partkey from part) AND p_size = ANY (select p_size from part group by p_size) PREHOOK: type: QUERY @@ -388,19 +388,20 @@ POSTHOOK: Input: default@part #### A masked pattern was here #### CBO PLAN: HiveAggregate(group=[{}], agg#0=[count()]) - HiveSemiJoin(condition=[=($1, $5)], joinType=[semi]) - HiveJoin(condition=[OR(AND(>=($0, $2), <>($3, 0)), AND(>=($0, $2), <>($3, 0), IS NOT TRUE(>=($0, $2)), <=($3, $4)))], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(p_partkey=[$0], p_size=[$5]) - HiveFilter(condition=[IS NOT NULL($5)]) - HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveJoin(condition=[AND(=($1, $2), OR(AND(>=($0, $3), <>($4, 0)), AND(>=($0, $3), <>($4, 0), IS NOT TRUE(>=($0, $3)), <=($4, $5))))], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_partkey=[$0], p_size=[$5]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(p_size=[$0]) + HiveAggregate(group=[{5}]) + HiveFilter(condition=[IS NOT NULL($5)]) + HiveTableScan(table=[[default, part]], table:alias=[part]) HiveProject(m=[$0], c=[$1], d=[$2]) HiveAggregate(group=[{}], m=[MIN($0)], c=[COUNT()], d=[COUNT($0)]) HiveTableScan(table=[[default, part]], table:alias=[part]) - HiveProject(p_size=[$5]) - HiveFilter(condition=[IS NOT NULL($5)]) - HiveTableScan(table=[[default, part]], table:alias=[part]) -Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[43][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select count(*) from part where p_partkey >= ANY (select p_partkey from part) AND p_size = ANY (select p_size from part group by p_size) PREHOOK: type: QUERY diff --git a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out index 30a8ba43feae..6f52236e403d 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_exists.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_exists.q.out @@ -396,7 +396,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 diff --git a/ql/src/test/results/clientpositive/llap/subquery_in.q.out b/ql/src/test/results/clientpositive/llap/subquery_in.q.out index 598b05aa0600..9712fe825c36 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in.q.out @@ -390,24 +390,18 @@ STAGE PLANS: expressions: (UDFToDouble(_col0) / _col1) (type: double) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: double) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: double) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: double) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: double) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 UDFToDouble(_col1) (type: double) 1 _col0 (type: double) @@ -1185,17 +1179,13 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 14 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col3 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 14 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col1 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: int) - Statistics: Num rows: 14 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col3 (type: int) + value expressions: _col1 (type: int), _col2 (type: int) Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -1203,12 +1193,12 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col1 (type: int) + 0 _col0 (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col3 + outputColumnNames: _col1, _col2 Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col3 (type: int) + expressions: _col1 (type: int), _col2 (type: int) outputColumnNames: _col0, _col1 Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator @@ -1634,15 +1624,15 @@ STAGE PLANS: predicate: p_size is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), (p_size - 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (_col5 - 1) (type: int) + key expressions: _col9 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1685,7 +1675,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 (_col5 - 1) (type: int) + 0 _col9 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -1769,15 +1759,15 @@ STAGE PLANS: predicate: (p_partkey is not null and p_size is not null) (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), (p_partkey * p_size) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (_col0 * _col5) (type: int) + key expressions: _col9 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (_col0 * _col5) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -1820,7 +1810,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 (_col0 * _col5) (type: int) + 0 _col9 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 24 Data size: 14856 Basic stats: COMPLETE Column stats: COMPLETE @@ -2016,15 +2006,15 @@ STAGE PLANS: predicate: p_retailprice is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), floor(p_retailprice) (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: floor(_col7) (type: bigint) + key expressions: _col9 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: floor(_col7) (type: bigint) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col9 (type: bigint) + Statistics: Num rows: 26 Data size: 16302 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2067,7 +2057,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 floor(_col7) (type: bigint) + 0 _col9 (type: bigint) 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 24 Data size: 14856 Basic stats: COMPLETE Column stats: COMPLETE @@ -2746,7 +2736,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -2779,18 +2769,12 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -2976,7 +2960,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3009,18 +2993,12 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -3116,15 +3094,15 @@ STAGE PLANS: predicate: p_size is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), (p_size - 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (_col5 - 1) (type: int) + key expressions: _col9 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3167,7 +3145,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 (_col5 - 1) (type: int) + 0 _col9 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -3265,15 +3243,15 @@ STAGE PLANS: predicate: p_size is not null (type: boolean) Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string), (p_size - 1) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: (_col5 - 1) (type: int) + key expressions: _col9 (type: int) null sort order: z sort order: + - Map-reduce partition columns: (_col5 - 1) (type: int) - Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col9 (type: int) + Statistics: Num rows: 26 Data size: 16198 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -3316,7 +3294,7 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 (_col5 - 1) (type: int) + 0 _col9 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE @@ -5351,7 +5329,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 UDFToLong(_col0) (type: bigint) 1 _col0 (type: bigint) @@ -5375,18 +5353,12 @@ STAGE PLANS: Filter Operator predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -6051,3 +6023,80 @@ POSTHOOK: Input: default@part 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl +PREHOOK: query: create table big (col1 string, col2 int, col3 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@big +POSTHOOK: query: create table big (col1 string, col2 int, col3 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@big +PREHOOK: query: create table mid (col1 string, col2 int, col3 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@mid +POSTHOOK: query: create table mid (col1 string, col2 int, col3 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mid +PREHOOK: query: create table small (col1 string, col2 int, col3 double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@small +POSTHOOK: query: create table small (col1 string, col2 int, col3 double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@small +PREHOOK: query: alter table big update statistics set('numRows'='123456', 'rawDataSize'='1234567') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@big +PREHOOK: Output: default@big +POSTHOOK: query: alter table big update statistics set('numRows'='123456', 'rawDataSize'='1234567') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@big +POSTHOOK: Output: default@big +PREHOOK: query: alter table mid update statistics set('numRows'='12345', 'rawDataSize'='123456') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@mid +PREHOOK: Output: default@mid +POSTHOOK: query: alter table mid update statistics set('numRows'='12345', 'rawDataSize'='123456') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@mid +POSTHOOK: Output: default@mid +PREHOOK: query: alter table small update statistics set('numRows'='1234', 'rawDataSize'='12345') +PREHOOK: type: ALTERTABLE_UPDATETABLESTATS +PREHOOK: Input: default@small +PREHOOK: Output: default@small +POSTHOOK: query: alter table small update statistics set('numRows'='1234', 'rawDataSize'='12345') +POSTHOOK: type: ALTERTABLE_UPDATETABLESTATS +POSTHOOK: Input: default@small +POSTHOOK: Output: default@small +PREHOOK: query: explain cbo +select col1 from small where col2 in (select col2 from mid) and col3 in (select col3 from big) +PREHOOK: type: QUERY +PREHOOK: Input: default@big +PREHOOK: Input: default@mid +PREHOOK: Input: default@small +#### A masked pattern was here #### +POSTHOOK: query: explain cbo +select col1 from small where col2 in (select col2 from mid) and col3 in (select col3 from big) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@big +POSTHOOK: Input: default@mid +POSTHOOK: Input: default@small +#### A masked pattern was here #### +CBO PLAN: +HiveProject(col1=[$0]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(col1=[$0], col2=[$1]) + HiveSemiJoin(condition=[=($2, $3)], joinType=[semi]) + HiveProject(col1=[$0], col2=[$1], col3=[$2]) + HiveFilter(condition=[AND(IS NOT NULL($1), IS NOT NULL($2))]) + HiveTableScan(table=[[default, small]], table:alias=[small]) + HiveProject(col3=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, big]], table:alias=[big]) + HiveProject(col2=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveTableScan(table=[[default, mid]], table:alias=[mid]) + diff --git a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out index 4423b152f39c..77cf82612cca 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_in_having.q.out @@ -282,9 +282,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -310,6 +308,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: part_subq + filterExpr: p_mfgr is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) keys: p_mfgr (type: string) @@ -327,71 +336,48 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) + aggregations: max(VALUE._col0), min(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (UDFToDouble(_col1) / _col2) (type: double) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: ((_col1 - _col2) < 20) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 + Dummy Store Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) + Select Operator + expressions: _col0 (type: string), (UDFToDouble(_col1) / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator @@ -434,9 +420,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE) - Reducer 4 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -462,6 +446,17 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: part_subq + filterExpr: p_mfgr is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: p_mfgr is not null (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: max(p_size), min(p_size) keys: p_mfgr (type: string) @@ -479,71 +474,48 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), count(VALUE._col1) + aggregations: max(VALUE._col0), min(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), (UDFToDouble(_col1) / _col2) (type: double) - outputColumnNames: _col0, _col1 + Filter Operator + predicate: ((_col1 - _col2) < 20) (type: boolean) Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) + Select Operator + expressions: _col0 (type: string) + outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: double) - Reducer 3 + Dummy Store Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 4 - Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator - aggregations: max(VALUE._col0), min(VALUE._col1) + aggregations: sum(VALUE._col0), count(VALUE._col1) keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: ((_col1 - _col2) < 20) (type: boolean) + Select Operator + expressions: _col0 (type: string), (UDFToDouble(_col1) / _col2) (type: double) + outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 206 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/subquery_join_rewrite.q.out b/ql/src/test/results/clientpositive/llap/subquery_join_rewrite.q.out index 4529f13ea13c..fdc83e25449e 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_join_rewrite.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_join_rewrite.q.out @@ -209,14 +209,15 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@web_sales #### A masked pattern was here #### CBO PLAN: -HiveSemiJoin(condition=[true], joinType=[semi]) - HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_order_number=[$0]) - HiveSortLimit(fetch=[1]) - HiveProject(ws_order_number=[CAST(2):INTEGER]) - HiveFilter(condition=[=($0, 2)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) +HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_order_number=[$0]) + HiveSortLimit(fetch=[1]) + HiveProject(ws_order_number=[CAST(2):INTEGER]) + HiveFilter(condition=[=($0, 2)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) Warning: Shuffle Join MERGEJOIN[16][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: select * from web_sales ws1 @@ -275,16 +276,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@web_sales #### A masked pattern was here #### CBO PLAN: -HiveSemiJoin(condition=[=($0, $2)], joinType=[semi]) - HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_order_number=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveProject(ws_order_number=[$0]) - HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[1]) - HiveProject(ws_order_number=[$0]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) +HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveProject(ws_order_number=[$0]) + HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[1]) + HiveProject(ws_order_number=[$0]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) PREHOOK: query: select * from web_sales ws1 where ws1.ws_order_number in (select ws2.ws_order_number from web_sales ws2 order by ws2.ws_order_number nulls last limit 1) @@ -311,16 +313,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@web_sales #### A masked pattern was here #### CBO PLAN: -HiveSemiJoin(condition=[=($0, $2)], joinType=[semi]) - HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ws_order_number=[$0]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveProject(ws_order_number=[$0]) - HiveSortLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[1]) - HiveProject(ws_order_number=[$0]) - HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) +HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) + HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_order_number=[$0], ws_warehouse_sk=[$1]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) + HiveProject(ws_order_number=[$0]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveProject(ws_order_number=[$0]) + HiveSortLimit(sort0=[$0], dir0=[ASC-nulls-first], fetch=[1]) + HiveProject(ws_order_number=[$0]) + HiveTableScan(table=[[default, web_sales]], table:alias=[ws2]) PREHOOK: query: select * from web_sales ws1 where ws1.ws_order_number in (select ws2.ws_order_number from web_sales ws2 order by ws2.ws_order_number nulls first limit 1) diff --git a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out index 61b7d685bf8f..062ce061a8de 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_multi.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_multi.q.out @@ -246,7 +246,7 @@ POSTHOOK: Input: default@part_null 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -264,36 +264,14 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col1 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col1 (type: string) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 Map Operator Tree: TableScan alias: part_null @@ -329,57 +307,49 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -387,42 +357,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 + 0 _col1 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint), _col10 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col9 = 0L) or (_col12 is null and (_col10 >= _col9) and _col3 is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col10 = 0L) or (_col13 is null and (_col11 >= _col10) and _col3 is not null)) (type: boolean) + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 7 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -430,6 +387,11 @@ STAGE PLANS: mode: mergepartial outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Select Operator expressions: _col0 (type: string), true (type: boolean) outputColumnNames: _col0, _col1 @@ -441,6 +403,24 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) Stage: Stage-0 Fetch Operator @@ -448,7 +428,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -457,7 +437,7 @@ POSTHOOK: query: select * from part_null where p_name IN (select p_name from par POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -475,41 +455,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 Map Operator Tree: TableScan alias: part_null @@ -546,54 +500,63 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: string) + key expressions: _col3 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col1 (type: string) + Map-reduce partition columns: _col3 (type: string) Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -601,42 +564,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 + 0 _col1 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint), _col10 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col9 = 0L) or (_col12 is null and (_col10 >= _col9) and _col3 is not null)) (type: boolean) - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col10 = 0L) or (_col13 is null and (_col11 >= _col10) and _col3 is not null)) (type: boolean) + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 7 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -655,6 +605,37 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -662,7 +643,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND p_brand NOT IN (select p_type from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -697,7 +678,7 @@ POSTHOOK: Input: default@part_null 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -715,41 +696,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE) - Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) - Reducer 6 <- Map 5 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 7 (XPROD_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 6 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE) + Reducer 6 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) + Reducer 7 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_brand is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_brand (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map 5 Map Operator Tree: TableScan alias: part_null @@ -786,7 +741,19 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_brand is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_brand (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -801,39 +768,36 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 - Execution mode: llap + Execution mode: vectorized, llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count(VALUE._col0), count(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col3 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: - 0 _col3 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 - Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE + 0 + 1 + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) + key expressions: _col2 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1 Data size: 385 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -841,42 +805,29 @@ STAGE PLANS: condition map: Inner Join 0 to 1 keys: - 0 - 1 + 0 _col3 (type: string) + 1 _col2 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col12 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col11 (type: bigint), _col12 (type: bigint), _col10 (type: boolean) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col9 = 0L) or (_col12 is null and (_col10 >= _col9))) (type: boolean) - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + predicate: ((_col10 = 0L) or (_col13 is null and (_col11 >= _col10))) (type: boolean) + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 1556 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 1355 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0), count(VALUE._col1) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 7 + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -895,6 +846,37 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean) + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col3 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col3 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col3 (type: string) + Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: boolean) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: string) Stage: Stage-0 Fetch Operator @@ -902,7 +884,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[57][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3]] in Stage 'Reducer 4' is a cross product +Warning: Shuffle Join MERGEJOIN[63][tables = [$hdt$_2, $hdt$_3]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select * from part_null where p_brand IN (select p_brand from part_null) AND p_brand NOT IN (select p_name from part_null) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1153,7 +1135,7 @@ POSTHOOK: Input: default@tempty 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1173,8 +1155,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1183,10 +1166,22 @@ STAGE PLANS: TableScan alias: part_null filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -1200,33 +1195,6 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs Map 5 Map Operator Tree: TableScan @@ -1249,11 +1217,25 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -1264,12 +1246,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 @@ -1303,7 +1285,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND EXISTS (select c from tnull) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1340,7 +1322,7 @@ POSTHOOK: Input: default@tnull 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1360,8 +1342,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1370,10 +1353,22 @@ STAGE PLANS: TableScan alias: part_null filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -1387,33 +1382,6 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs Map 5 Map Operator Tree: TableScan @@ -1436,11 +1404,25 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col5 (type: int) 1 _col0 (type: int) @@ -1451,12 +1433,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 1 @@ -1490,7 +1472,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[38][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[40][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1501,7 +1483,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@part_null POSTHOOK: Input: default@tempty #### A masked pattern was here #### -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -1521,8 +1503,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -1531,10 +1514,22 @@ STAGE PLANS: TableScan alias: part_null filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -1548,33 +1543,6 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs Map 5 Map Operator Tree: TableScan @@ -1597,11 +1565,25 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -1612,7 +1594,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -1621,10 +1603,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 Statistics: Num rows: 1 Data size: 1233 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: _col9 is null (type: boolean) + predicate: _col10 is null (type: boolean) Statistics: Num rows: 1 Data size: 1233 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -1657,7 +1639,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -2086,8 +2068,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE) Reducer 3 <- Map 4 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 5 <- Map 4 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2140,22 +2123,18 @@ STAGE PLANS: Filter Operator predicate: p_name is not null (type: boolean) Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: p_name (type: string) + Group By Operator + keys: p_name (type: string) + minReductionHashAggr: 0.4 + mode: hash outputColumnNames: _col0 - Statistics: Num rows: 26 Data size: 3146 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 @@ -2163,7 +2142,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -2194,6 +2173,20 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 3025 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator @@ -4575,7 +4568,7 @@ POSTHOOK: Input: default@part_null #### A masked pattern was here #### 192697 almond antique blue firebrick mint Manufacturer#5 Brand#52 MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part_null where p_size IN (select p_size from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -4595,8 +4588,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -4605,10 +4599,22 @@ STAGE PLANS: TableScan alias: part_null filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_size (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -4622,33 +4628,6 @@ STAGE PLANS: value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_size is not null (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_size (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs Map 5 Map Operator Tree: TableScan @@ -4671,11 +4650,25 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col5 (type: int) 1 _col0 (type: int) @@ -4686,7 +4679,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4695,10 +4688,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 Statistics: Num rows: 1 Data size: 1233 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: _col9 is null (type: boolean) + predicate: _col10 is null (type: boolean) Statistics: Num rows: 1 Data size: 1233 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -4731,7 +4724,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part_null where p_size IN (select p_size from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -4769,7 +4762,7 @@ POSTHOOK: Input: default@tempty 85768 almond antique chartreuse lavender yellow Manufacturer#1 Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull 86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12 STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully 90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31 MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: explain select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null @@ -4789,8 +4782,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE), Reducer 6 (CUSTOM_SIMPLE_EDGE) Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -4799,10 +4793,22 @@ STAGE PLANS: TableScan alias: part_null filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 1120 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: p_name (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_partkey (type: int), p_name (type: string), p_mfgr (type: string), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -4816,33 +4822,6 @@ STAGE PLANS: value expressions: _col0 (type: int), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs - Map 4 - Map Operator Tree: - TableScan - alias: part_null - filterExpr: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Filter Operator - predicate: p_name is not null (type: boolean) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: p_name (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs Map 5 Map Operator Tree: TableScan @@ -4865,11 +4844,25 @@ STAGE PLANS: Execution mode: vectorized, llap LLAP IO: all inputs Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -4880,7 +4873,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 1232 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 + Reducer 4 Execution mode: llap Reduce Operator Tree: Merge Join Operator @@ -4889,10 +4882,10 @@ STAGE PLANS: keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10 Statistics: Num rows: 1 Data size: 1233 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: _col9 is null (type: boolean) + predicate: _col10 is null (type: boolean) Statistics: Num rows: 1 Data size: 1233 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -4925,7 +4918,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[39][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[41][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 4' is a cross product PREHOOK: query: select * from part_null where p_name IN (select p_name from part_null) AND NOT EXISTS (select c from tempty) PREHOOK: type: QUERY PREHOOK: Input: default@part_null diff --git a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out index a7ae3c111737..bbfc162becb4 100644 --- a/ql/src/test/results/clientpositive/llap/subquery_notin.q.out +++ b/ql/src/test/results/clientpositive/llap/subquery_notin.q.out @@ -3457,7 +3457,7 @@ almond aquamarine sandy cyan gainsboro almond aquamarine yellow dodger mint almond azure aquamarine papaya violet almond azure blanched chiffon midnight -Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: explain select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -3627,7 +3627,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3660,18 +3660,12 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: @@ -3711,7 +3705,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join MERGEJOIN[61][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +Warning: Shuffle Join MERGEJOIN[59][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: select key, count(*) from src where value NOT IN (select key from src) group by key having count(*) in (select count(*) from src s1 where s1.key = '90' group by s1.key ) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -3928,7 +3922,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: bigint) 1 _col0 (type: bigint) @@ -3996,18 +3990,12 @@ STAGE PLANS: expressions: _col1 (type: bigint) outputColumnNames: _col0 Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 3a64704b3839..c4c7b458be2d 100644 --- a/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -36,7 +36,7 @@ STAGE PLANS: TableScan alias: li filterExpr: ((l_linenumber = 1) and l_partkey is not null and l_orderkey is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_50_container, bigKeyColName:l_partkey, smallTablePos:1, keyRatio:0.14 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_49_container, bigKeyColName:l_partkey, smallTablePos:1, keyRatio:0.14 Statistics: Num rows: 100 Data size: 1600 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true @@ -68,49 +68,41 @@ STAGE PLANS: hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: - 1 Map 2 + 1 Reducer 3 Statistics: Num rows: 14 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: int), _col0 (type: int), _col2 (type: int) - outputColumnNames: _col0, _col1, _col3 - Select Vectorization: - className: VectorSelectOperator + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [1, 0, 2] - Statistics: Num rows: 14 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - className: VectorMapJoinLeftSemiLongOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 2 + Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: int), _col2 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - hashTableImplementationType: OPTIMIZED - outputColumnNames: _col0, _col3 - input vertices: - 1 Reducer 3 + projectedOutputColumnNums: [1, 2] Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: int), _col3 (type: int) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1, 2] + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - File Sink Vectorization: - className: VectorFileSinkOperator - native: false - Statistics: Num rows: 14 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -242,7 +234,7 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 14 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index 8c5c55cda36e..acf72cecbeda 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -410,7 +410,7 @@ STAGE PLANS: TableScan alias: t1 filterExpr: (int_col_1 is not null and decimal0801_col is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_27_container, bigKeyColName:int_col_1, smallTablePos:1, keyRatio:1.0 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_25_container, bigKeyColName:int_col_1, smallTablePos:1, keyRatio:1.0 Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -432,7 +432,7 @@ STAGE PLANS: Statistics: Num rows: 5 Data size: 580 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col1 (type: int) 1 _col0 (type: int) @@ -440,7 +440,7 @@ STAGE PLANS: bigTableKeyColumns: 1:int bigTableRetainColumnNums: [0] bigTableValueColumns: 0:decimal(8,1) - className: VectorMapJoinLeftSemiLongOperator + className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nonOuterSmallTableKeyMapping: [] @@ -568,30 +568,17 @@ STAGE PLANS: predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: _col0 is not null (type: boolean) Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumns: 0:int - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query23.q.out index 14f050297158..8c46ce16568f 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query23.q.out @@ -1,3 +1,10 @@ +CTE Suggestion: +HiveFilter(condition=[IS NOT NULL($1)]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + CTE Suggestion: HiveProject($f1=[$0]) HiveFilter(condition=[>($2, 4)]) @@ -15,68 +22,47 @@ HiveProject($f1=[$0]) HiveTableScan(table=[[default, item]], table:alias=[item]) CTE Suggestion: -HiveProject(c_customer_sk=[$0]) - HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(EXPR$0=[*(0.95:DECIMAL(16, 6), $0)]) - HiveFilter(condition=[IS NOT NULL($0)]) - HiveAggregate(group=[{}], agg#0=[max($1)]) - HiveAggregate(group=[{1}], agg#0=[sum($2)]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_sold_date_sk=[$22], ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +HiveProject(EXPR$0=[*(0.95:DECIMAL(16, 6), $0)]) + HiveFilter(condition=[IS NOT NULL($0)]) + HiveAggregate(group=[{}], agg#0=[max($1)]) + HiveAggregate(group=[{1}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_sold_date_sk=[$22], ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) CTE Suggestion: -HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) +HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[333][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[349][bigTable=?] in task 'Reducer 9' is a cross product CBO PLAN: HiveAggregate(group=[{}], agg#0=[sum($0)]) - HiveProject(sales=[$0]) + HiveProject($f0=[$0]) HiveUnion(all=[true]) - HiveProject(sales=[*(CAST($2):DECIMAL(10, 0), $3)]) - HiveSemiJoin(condition=[=($0, $8)], joinType=[semi]) - HiveSemiJoin(condition=[=($1, $8)], joinType=[semi]) - HiveJoin(condition=[=($4, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_bill_customer_sk=[$2], cs_item_sk=[$14], cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($33))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f1=[$0]) - HiveFilter(condition=[>($2, 4)]) - HiveProject(i_item_sk=[$3], d_date=[$1], $f2=[$2]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) - HiveAggregate(group=[{0, 3}], agg#0=[count()]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(c_customer_sk=[$0]) - HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveSemiJoin(condition=[=($0, $3)], joinType=[semi]) + HiveProject(cs_item_sk=[$3], cs_quantity=[$4], cs_list_price=[$5]) + HiveJoin(condition=[>($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($4, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_bill_customer_sk=[$2], cs_item_sk=[$14], cs_quantity=[$17], cs_list_price=[$19], cs_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($33))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(EXPR$0=[*(0.95:DECIMAL(16, 6), $0)]) HiveFilter(condition=[IS NOT NULL($0)]) HiveProject($f0=[$0]) @@ -90,39 +76,39 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(sales=[*(CAST($2):DECIMAL(10, 0), $3)]) - HiveSemiJoin(condition=[=($1, $8)], joinType=[semi]) - HiveSemiJoin(condition=[=($0, $8)], joinType=[semi]) - HiveJoin(condition=[=($4, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_bill_customer_sk=[$3], ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject($f1=[$0]) - HiveFilter(condition=[>($2, 4)]) - HiveProject(i_item_sk=[$3], d_date=[$1], $f2=[$2]) - HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) - HiveAggregate(group=[{0, 3}], agg#0=[count()]) - HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(i_item_sk=[$0]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(c_customer_sk=[$0]) - HiveJoin(condition=[>($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_customer_sk=[$0], $f1=[$1]) - HiveFilter(condition=[IS NOT NULL($1)]) - HiveAggregate(group=[{0}], agg#0=[sum($1)]) - HiveProject(ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject($f1=[$0]) + HiveFilter(condition=[>($2, 4)]) + HiveProject(i_item_sk=[$3], d_date=[$1], $f2=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{0, 3}], agg#0=[count()]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject($f0=[*(CAST($1):DECIMAL(10, 0), $2)]) + HiveSemiJoin(condition=[=($0, $3)], joinType=[semi]) + HiveProject(ws_item_sk=[$2], ws_quantity=[$4], ws_list_price=[$5]) + HiveJoin(condition=[>($1, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_customer_sk=[$0], $f1=[$1]) + HiveFilter(condition=[IS NOT NULL($1)]) + HiveAggregate(group=[{0}], agg#0=[sum($1)]) + HiveProject(ss_customer_sk=[CAST($2):BIGINT NOT NULL], $f1=[*(CAST($9):DECIMAL(10, 0), $12)]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($4, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_bill_customer_sk=[$3], ws_quantity=[$17], ws_list_price=[$19], ws_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($3), IS NOT NULL($33))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(EXPR$0=[*(0.95:DECIMAL(16, 6), $0)]) HiveFilter(condition=[IS NOT NULL($0)]) HiveProject($f0=[$0]) @@ -136,4 +122,19 @@ HiveAggregate(group=[{}], agg#0=[sum($0)]) HiveProject(d_date_sk=[$0]) HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject($f1=[$0]) + HiveFilter(condition=[>($2, 4)]) + HiveProject(i_item_sk=[$3], d_date=[$1], $f2=[$2]) + HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$0], d_date=[$1], $f2=[$2]) + HiveAggregate(group=[{0, 3}], agg#0=[count()]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IN($6, 1999, 2000, 2001, 2002)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(i_item_sk=[$0]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out index 9cfefefedb45..16a9cf2c319e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query33.q.out @@ -1,23 +1,22 @@ CTE Suggestion: -HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) +HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + +CTE Suggestion: +HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) CTE Suggestion: -HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) +HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -CTE Suggestion: -HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - -CTE Suggestion: -HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(i_manufact_id=[$0], $f1=[$1]) @@ -25,66 +24,72 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(i_manufact_id=[$0], $f1=[$1]) HiveUnion(all=[true]) HiveProject(i_manufact_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$1], i_manufact_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_manufact_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($1, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$1], i_manufact_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_manufact_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(3):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) - HiveFilter(condition=[IS NOT NULL($13)]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_manufact_id=[$13]) - HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 3))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$1], i_manufact_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_manufact_id=[$13]) + HiveFilter(condition=[IS NOT NULL($13)]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_manufact_id=[$0]) + HiveAggregate(group=[{13}]) + HiveFilter(condition=[AND(=($12, _UTF-16LE'Books '), IS NOT NULL($13))]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out index 4f39e767f3c1..93747b9b0286 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query56.q.out @@ -1,18 +1,21 @@ CTE Suggestion: -HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) +HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + +CTE Suggestion: +HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) CTE Suggestion: -HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) +HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -CTE Suggestion: -HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - CBO PLAN: HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -20,63 +23,69 @@ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveUnion(all=[true]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($1, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(2000):INTEGER], d_moy=[CAST(1):INTEGER]) - HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-8:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 2000), =($8, 1))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -8:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[IN($17, _UTF-16LE'chiffon', _UTF-16LE'lace', _UTF-16LE'orchid')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out index e979b30e0672..62fc6e473335 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query58.q.out @@ -1,111 +1,113 @@ CTE Suggestion: -HiveProject(d_date=[$0]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveFilter(condition=[sq_count_check($0)]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, 1998-02-19)]) +HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - -CTE Suggestion: -HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveFilter(condition=[sq_count_check($0)]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, 1998-02-19)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[396][bigTable=?] in task 'Map 11' is a cross product CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(item_id=[$4], ss_item_rev=[$7], ss_dev=[*(/(/($7, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], cs_item_rev=[$5], cs_dev=[*(/(/($5, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], ws_item_rev=[$1], ws_dev=[*(/(/($1, +(+($7, $5), $1)), 3:DECIMAL(10, 0)), 100:DECIMAL(10, 0))], average=[/(+(+($7, $5), $1), 3:DECIMAL(10, 0))]) HiveJoin(condition=[AND(=($4, $0), BETWEEN(false, $7, $2, $3), BETWEEN(false, $5, $2, $3), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $7), *(1.1:DECIMAL(2, 1), $7)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $5), *(1.1:DECIMAL(2, 1), $5)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(item_id=[$0], ws_item_rev=[$1], EXPR$0=[*(0.9:DECIMAL(1, 1), $1)], EXPR$1=[*(1.1:DECIMAL(2, 1), $1)]) - HiveAggregate(group=[{6}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($4, $5)], joinType=[semi]) - HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveAggregate(group=[{7}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[sq_count_check($0)]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, 1998-02-19)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveFilter(condition=[sq_count_check($0)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, 1998-02-19)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveJoin(condition=[AND(=($2, $0), BETWEEN(false, $3, *(0.9:DECIMAL(1, 1), $1), *(1.1:DECIMAL(2, 1), $1)), BETWEEN(false, $1, *(0.9:DECIMAL(1, 1), $3), *(1.1:DECIMAL(2, 1), $3)))], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{6}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($4, $5)], joinType=[semi]) - HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) - HiveFilter(condition=[IS NOT NULL($33)]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveAggregate(group=[{7}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) + HiveFilter(condition=[IS NOT NULL($33)]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[sq_count_check($0)]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, 1998-02-19)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveFilter(condition=[sq_count_check($0)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, 1998-02-19)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{6}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($4, $5)], joinType=[semi]) - HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[IS NOT NULL($22)]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveAggregate(group=[{7}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[IS NOT NULL($22)]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(cnt=[$0]) - HiveFilter(condition=[sq_count_check($0)]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveJoin(condition=[=($1, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(cnt=[$0]) - HiveAggregate(group=[{}], cnt=[COUNT()]) - HiveFilter(condition=[=($2, 1998-02-19)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveFilter(condition=[sq_count_check($0)]) + HiveProject(cnt=[$0]) + HiveAggregate(group=[{}], cnt=[COUNT()]) + HiveFilter(condition=[=($2, 1998-02-19)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(=($2, 1998-02-19), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out index 64126092ba03..5fe958200c55 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query60.q.out @@ -1,18 +1,21 @@ CTE Suggestion: -HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) +HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + +CTE Suggestion: +HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) CTE Suggestion: -HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) +HiveProject(d_date_sk=[$0]) HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) -CTE Suggestion: -HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], $f1=[$1]) @@ -20,63 +23,69 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveProject(i_item_id=[$0], $f1=[$1]) HiveUnion(all=[true]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_item_sk=[$1], ss_addr_sk=[$5], ss_ext_sales_price=[$14], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($1, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) - HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cs_bill_addr_sk=[$5], cs_item_sk=[$14], cs_ext_sales_price=[$22], cs_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($33))]) + HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{10}], agg#0=[sum($2)]) - HiveSemiJoin(condition=[=($10, $11)], joinType=[semi]) - HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($1, $7)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) - HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) - HiveProject(d_date_sk=[$0], d_year=[CAST(1999):INTEGER], d_moy=[CAST(9):INTEGER]) - HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(ca_address_sk=[$0], ca_gmt_offset=[CAST(-6:DECIMAL(5, 2)):DECIMAL(5, 2)]) - HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) - HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(i_item_sk=[$0], i_item_id=[$1]) - HiveTableScan(table=[[default, item]], table:alias=[item]) - HiveProject(i_item_id=[$1]) - HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) - HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveAggregate(group=[{7}], agg#0=[sum($2)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $4)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ws_item_sk=[$2], ws_bill_addr_sk=[$6], ws_ext_sales_price=[$22], ws_sold_date_sk=[$33]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($33))]) + HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[AND(=($6, 1999), =($8, 9))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(ca_address_sk=[$0]) + HiveFilter(condition=[=($11, -6:DECIMAL(1, 0))]) + HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) + HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_id0=[$2]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(i_item_sk=[$0], i_item_id=[$1]) + HiveTableScan(table=[[default, item]], table:alias=[item]) + HiveProject(i_item_id=[$0]) + HiveAggregate(group=[{1}]) + HiveFilter(condition=[=($12, _UTF-16LE'Children ')]) + HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query70.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query70.q.out index f3ba2a41c7f4..7b03475e55dd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query70.q.out @@ -5,32 +5,33 @@ HiveProject(total_sum=[$0], s_state=[$1], s_county=[$2], lochierarchy=[$3], rank HiveProject($f0=[$0], $f1=[$1], $f2=[$2], GROUPING__ID=[$3]) HiveAggregate(group=[{0, 1}], groups=[[{0, 1}, {0}, {}]], agg#0=[sum($2)], GROUPING__ID=[GROUPING__ID()]) HiveProject($f0=[$7], $f1=[$6], $f2=[$1]) - HiveSemiJoin(condition=[=($7, $8)], joinType=[semi]) - HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_store_sk=[$6], ss_net_profit=[$21], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0], d_month_seq=[$3]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) - HiveProject(s_store_sk=[$0], s_county=[$23], s_state=[$24]) - HiveFilter(condition=[IS NOT NULL($24)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) - HiveProject(s_state=[$0]) - HiveFilter(condition=[<=($1, 5)]) - HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[rank() OVER (PARTITION BY $0 ORDER BY $1 DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)]) - HiveProject(s_state=[$0], $f1=[$1]) - HiveAggregate(group=[{5}], agg#0=[sum($1)]) - HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(ss_store_sk=[$6], ss_net_profit=[$21], ss_sold_date_sk=[$22]) - HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) - HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) - HiveProject(d_date_sk=[$0]) - HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(s_store_sk=[$0], s_state=[$24]) - HiveFilter(condition=[IS NOT NULL($24)]) - HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$6], ss_net_profit=[$21], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0], d_month_seq=[$3]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[d1]) + HiveProject(s_store_sk=[$0], s_county=[$1], s_state=[$2], s_state0=[$3]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(s_store_sk=[$0], s_county=[$23], s_state=[$24]) + HiveFilter(condition=[IS NOT NULL($24)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) + HiveProject(s_state=[$0]) + HiveFilter(condition=[<=($1, 5)]) + HiveProject((tok_table_or_col s_state)=[$0], rank_window_0=[rank() OVER (PARTITION BY $0 ORDER BY $1 DESC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)]) + HiveProject(s_state=[$0], $f1=[$1]) + HiveAggregate(group=[{5}], agg#0=[sum($1)]) + HiveJoin(condition=[=($4, $0)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($3, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(ss_store_sk=[$6], ss_net_profit=[$21], ss_sold_date_sk=[$22]) + HiveFilter(condition=[AND(IS NOT NULL($6), IS NOT NULL($22))]) + HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales]) + HiveProject(d_date_sk=[$0]) + HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(s_store_sk=[$0], s_state=[$24]) + HiveFilter(condition=[IS NOT NULL($24)]) + HiveTableScan(table=[[default, store]], table:alias=[store]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query83.q.out index d28432426ab6..4cbf6c1e1189 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query83.q.out @@ -1,17 +1,16 @@ CTE Suggestion: -HiveProject(d_date=[$0]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - -CTE Suggestion: -HiveProject(d_date_sk=[$0], d_date=[$2]) - HiveFilter(condition=[IS NOT NULL($2)]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) +HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(d_date_sk=[$0], d_date=[$2]) + HiveFilter(condition=[IS NOT NULL($2)]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) CBO PLAN: HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) @@ -19,66 +18,69 @@ HiveSortLimit(sort0=[$0], sort1=[$1], dir0=[ASC], dir1=[ASC], fetch=[100]) HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{6}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($4, $5)], joinType=[semi]) - HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(cr_item_sk=[$1], cr_return_quantity=[$16], cr_returned_date_sk=[$26]) - HiveFilter(condition=[IS NOT NULL($26)]) - HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveAggregate(group=[{7}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(cr_item_sk=[$1], cr_return_quantity=[$16], cr_returned_date_sk=[$26]) + HiveFilter(condition=[IS NOT NULL($26)]) + HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(i_item_id=[$0], $f1=[$1]) - HiveAggregate(group=[{6}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($4, $5)], joinType=[semi]) - HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(sr_item_sk=[$1], sr_return_quantity=[$9], sr_returned_date_sk=[$19]) - HiveFilter(condition=[IS NOT NULL($19)]) - HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveAggregate(group=[{7}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(sr_item_sk=[$1], sr_return_quantity=[$9], sr_returned_date_sk=[$19]) + HiveFilter(condition=[IS NOT NULL($19)]) + HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) HiveProject(item_id=[$0], wr_item_qty=[$1], EXPR$0=[CAST($1):DOUBLE]) - HiveAggregate(group=[{6}], agg#0=[sum($1)]) - HiveJoin(condition=[=($0, $5)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveSemiJoin(condition=[=($4, $5)], joinType=[semi]) - HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveProject(wr_item_sk=[$1], wr_return_quantity=[$13], wr_returned_date_sk=[$23]) - HiveFilter(condition=[IS NOT NULL($23)]) - HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveAggregate(group=[{7}], agg#0=[sum($1)]) + HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveProject(wr_item_sk=[$1], wr_return_quantity=[$13], wr_returned_date_sk=[$23]) + HiveFilter(condition=[IS NOT NULL($23)]) + HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns]) + HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(d_date_sk=[$0], d_date=[$2]) HiveFilter(condition=[IS NOT NULL($2)]) HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_date=[$0]) - HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) - HiveProject(d_date=[$2], d_week_seq=[$4]) - HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) - HiveProject(d_week_seq=[$4]) - HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) - HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_date=[$0]) + HiveAggregate(group=[{0}]) + HiveSemiJoin(condition=[=($1, $2)], joinType=[semi]) + HiveProject(d_date=[$2], d_week_seq=[$4]) + HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) + HiveProject(d_week_seq=[$4]) + HiveFilter(condition=[AND(IN($2, 1998-01-02:DATE, 1998-10-15:DATE, 1998-11-10:DATE), IS NOT NULL($4))]) + HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim]) HiveProject(i_item_sk=[$0], i_item_id=[$1]) HiveTableScan(table=[[default, item]], table:alias=[item]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out index 279de7cfc1d2..935028ef22fa 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/cbo_query95.q.out @@ -1,18 +1,18 @@ CBO PLAN: -HiveAggregate(group=[{}], agg#0=[count(DISTINCT $3)], agg#1=[sum($4)], agg#2=[sum($5)]) - HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) - HiveSemiJoin(condition=[=($3, $12)], joinType=[semi]) - HiveProject(ws_ship_date_sk=[$0], ws_ship_addr_sk=[$1], ws_web_site_sk=[$2], ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5], d_date_sk=[$10], d_date=[$11], ca_address_sk=[$6], ca_state=[$7], web_site_sk=[$8], web_company_name=[$9]) - HiveJoin(condition=[=($0, $10)], joinType=[inner], algorithm=[none], cost=[not available]) - HiveJoin(condition=[=($2, $8)], joinType=[inner], algorithm=[none], cost=[not available]) +HiveAggregate(group=[{}], agg#0=[count(DISTINCT $0)], agg#1=[sum($1)], agg#2=[sum($2)]) + HiveSemiJoin(condition=[=($0, $3)], joinType=[semi]) + HiveSemiJoin(condition=[=($0, $3)], joinType=[semi]) + HiveProject(ws_order_number=[$3], ws_ext_ship_cost=[$4], ws_net_profit=[$5]) + HiveJoin(condition=[=($0, $8)], joinType=[inner], algorithm=[none], cost=[not available]) + HiveJoin(condition=[=($2, $7)], joinType=[inner], algorithm=[none], cost=[not available]) HiveJoin(condition=[=($1, $6)], joinType=[inner], algorithm=[none], cost=[not available]) HiveProject(ws_ship_date_sk=[$1], ws_ship_addr_sk=[$10], ws_web_site_sk=[$12], ws_order_number=[$16], ws_ext_ship_cost=[$27], ws_net_profit=[$32]) HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($12), IS NOT NULL($1))]) HiveTableScan(table=[[default, web_sales]], table:alias=[ws1]) - HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):CHAR(2) CHARACTER SET "UTF-16LE"]) + HiveProject(ca_address_sk=[$0]) HiveFilter(condition=[=($8, _UTF-16LE'TX')]) HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address]) - HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri '):CHAR(50) CHARACTER SET "UTF-16LE"]) + HiveProject(web_site_sk=[$0]) HiveFilter(condition=[=($14, _UTF-16LE'pri ')]) HiveTableScan(table=[[default, web_site]], table:alias=[web_site]) HiveProject(d_date_sk=[$0], d_date=[$2]) diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out index 1e33338c615a..9459caaac1d5 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query23.q.out @@ -1,4 +1,5 @@ -Warning: Map Join MAPJOIN[318][bigTable=?] in task 'Reducer 7' is a cross product +Warning: Map Join MAPJOIN[333][bigTable=?] in task 'Reducer 5' is a cross product +Warning: Map Join MAPJOIN[349][bigTable=?] in task 'Reducer 9' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8,15 +9,21 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 12 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 4 <- Map 12 (BROADCAST_EDGE), Reducer 11 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Union 2 (CONTAINS) - Map 8 <- Map 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE) - Reducer 10 <- Reducer 9 (CUSTOM_SIMPLE_EDGE) - Reducer 11 <- Map 5 (BROADCAST_EDGE), Map 8 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) - Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Map 1 <- Map 14 (BROADCAST_EDGE) + Map 10 <- Map 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) + Map 16 <- Map 14 (BROADCAST_EDGE) + Map 3 <- Reducer 17 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (CUSTOM_SIMPLE_EDGE) + Reducer 13 <- Map 10 (SIMPLE_EDGE), Map 18 (BROADCAST_EDGE) + Reducer 15 <- Map 14 (SIMPLE_EDGE) + Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (CUSTOM_SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) + Reducer 7 <- Union 6 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Map 3 (SIMPLE_EDGE) + Reducer 9 <- Map 16 (CUSTOM_SIMPLE_EDGE), Reducer 12 (BROADCAST_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 8 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -24,7 +31,6 @@ STAGE PLANS: TableScan alias: catalog_sales filterExpr: cs_bill_customer_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_314_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:3.0779924292960215E-5 Statistics: Num rows: 43005109025 Data size: 6007427450388 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cs_bill_customer_sk is not null (type: boolean) @@ -41,46 +47,99 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 12 + 1 Map 14 Statistics: Num rows: 723144625 Data size: 82199941740 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col2, _col3 - input vertices: - 1 Reducer 11 - Statistics: Num rows: 723144625 Data size: 76414784740 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 7 - Statistics: Num rows: 723144625 Data size: 71473275804 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2)) - outputColumnNames: _col0 - Statistics: Num rows: 723144625 Data size: 80992198000 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(28,2)) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 723144625 Data size: 82199941740 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: decimal(7,2)) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 723144625 Data size: 4941508936 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1323694) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 10 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 82510879939 Data size: 10650501896012 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ss_customer_sk is not null (type: boolean) + Statistics: Num rows: 80566020964 Data size: 10399459558156 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_sold_date_sk (type: bigint), ss_customer_sk (type: bigint), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 80566020964 Data size: 10297258548832 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 14 + Statistics: Num rows: 64769599664 Data size: 7757159825120 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col1 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 64769599664 Data size: 7757159825120 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 64769599664 Data size: 7757159825120 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(28,2)) + Select Operator + expressions: ss_item_sk (type: bigint), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 82510879939 Data size: 1320174079024 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col3 + input vertices: + 1 Reducer 15 + Statistics: Num rows: 66333133964 Data size: 4245320573696 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: bigint), _col3 (type: date) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint), _col1 (type: date) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) + Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 12 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -114,7 +173,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 1468 Data size: 11744 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 8 + Target Vertex: Map 10 Select Operator expressions: d_date_sk (type: bigint), d_date (type: date) outputColumnNames: _col0, _col1 @@ -141,7 +200,7 @@ STAGE PLANS: Target Input: store_sales Partition key expr: ss_sold_date_sk Statistics: Num rows: 1468 Data size: 11744 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 8 + Target Vertex: Map 10 Filter Operator predicate: ((d_year = 1999) and (d_moy = 1)) (type: boolean) Statistics: Num rows: 31 Data size: 496 Basic stats: COMPLETE Column stats: COMPLETE @@ -166,11 +225,11 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator - Target column: cs_sold_date_sk (bigint) - Target Input: catalog_sales - Partition key expr: cs_sold_date_sk + Target column: ws_sold_date_sk (bigint) + Target Input: web_sales + Partition key expr: ws_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 + Target Vertex: Map 16 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -188,19 +247,18 @@ STAGE PLANS: outputColumnNames: _col0 Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE Dynamic Partitioning Event Operator - Target column: ws_sold_date_sk (bigint) - Target Input: web_sales - Partition key expr: ws_sold_date_sk + Target column: cs_sold_date_sk (bigint) + Target Input: catalog_sales + Partition key expr: cs_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 4 + Target Vertex: Map 1 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 16 Map Operator Tree: TableScan alias: web_sales filterExpr: ws_bill_customer_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_315_container, bigKeyColName:ws_item_sk, smallTablePos:1, keyRatio:0.01697464775419283 Statistics: Num rows: 21594638446 Data size: 3022914194636 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_bill_customer_sk is not null (type: boolean) @@ -217,46 +275,33 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3 input vertices: - 1 Map 12 + 1 Map 14 Statistics: Num rows: 366561381 Data size: 48050956264 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2, _col3 - input vertices: - 1 Reducer 11 - Statistics: Num rows: 366561381 Data size: 45118465216 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col2, _col3 - input vertices: - 1 Reducer 7 - Statistics: Num rows: 366561381 Data size: 42207520544 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: (CAST( _col2 AS decimal(10,0)) * _col3) (type: decimal(18,2)) - outputColumnNames: _col0 - Statistics: Num rows: 366561381 Data size: 41054874672 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col0) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(28,2)) + Reduce Output Operator + key expressions: _col1 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: bigint) + Statistics: Num rows: 366561381 Data size: 48050956264 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col2 (type: int), _col3 (type: decimal(7,2)) + Select Operator + expressions: _col1 (type: bigint) + outputColumnNames: _col1 + Statistics: Num rows: 366561381 Data size: 2910944672 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col1), max(_col1), bloom_filter(_col1, expectedEntries=1333117) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 18 Map Operator Tree: TableScan alias: item @@ -273,14 +318,14 @@ STAGE PLANS: Statistics: Num rows: 462000 Data size: 3696000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 + Map 3 Map Operator Tree: TableScan alias: store_sales - filterExpr: ss_customer_sk is not null (type: boolean) + filterExpr: (ss_customer_sk is not null and ((ss_customer_sk BETWEEN DynamicValue(RS_39_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_39_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_39_catalog_sales_cs_bill_customer_sk_bloom_filter))) or (ss_customer_sk BETWEEN DynamicValue(RS_114_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_114_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_114_web_sales_ws_bill_customer_sk_bloom_filter))))) (type: boolean) Statistics: Num rows: 86404891377 Data size: 10231957442552 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ss_customer_sk is not null (type: boolean) + predicate: (ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_39_catalog_sales_cs_bill_customer_sk_min) AND DynamicValue(RS_39_catalog_sales_cs_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_39_catalog_sales_cs_bill_customer_sk_bloom_filter))) (type: boolean) Statistics: Num rows: 82514936083 Data size: 9771313879636 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ss_customer_sk (type: bigint), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) @@ -300,75 +345,54 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 82514936083 Data size: 9872073696464 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(28,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 - Map Operator Tree: - TableScan - alias: store_sales - Statistics: Num rows: 82510879939 Data size: 10650501896012 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: ss_customer_sk is not null (type: boolean) - Statistics: Num rows: 80566020964 Data size: 10399459558156 Basic stats: COMPLETE Column stats: COMPLETE + predicate: (ss_customer_sk is not null and ss_customer_sk BETWEEN DynamicValue(RS_114_web_sales_ws_bill_customer_sk_min) AND DynamicValue(RS_114_web_sales_ws_bill_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_114_web_sales_ws_bill_customer_sk_bloom_filter))) (type: boolean) + Statistics: Num rows: 82514936083 Data size: 9771313879636 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ss_sold_date_sk (type: bigint), ss_customer_sk (type: bigint), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 80566020964 Data size: 10297258548832 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2 - input vertices: - 1 Reducer 13 - Statistics: Num rows: 64769599664 Data size: 7757159825120 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col1 (type: bigint) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 64769599664 Data size: 7757159825120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 64769599664 Data size: 7757159825120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(28,2)) - Select Operator - expressions: ss_item_sk (type: bigint), ss_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 82510879939 Data size: 1320174079024 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col3 - input vertices: - 1 Map 12 - Statistics: Num rows: 66333133964 Data size: 4245320573696 Basic stats: COMPLETE Column stats: COMPLETE + expressions: ss_customer_sk (type: bigint), (CAST( ss_quantity AS decimal(10,0)) * ss_sales_price) (type: decimal(18,2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 82514936083 Data size: 9771313879636 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: count() - keys: _col0 (type: bigint), _col3 (type: date) + aggregations: sum(_col1) + keys: _col0 (type: bigint) minReductionHashAggr: 0.99 mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 82514936083 Data size: 9872073696464 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint), _col1 (type: date) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: bigint), _col1 (type: date) - Statistics: Num rows: 33166566982 Data size: 2387992822704 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: bigint) + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 82514936083 Data size: 9872073696464 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(28,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 10 + Reducer 11 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 63129535 Data size: 7560736760 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: decimal(28,2)) + outputColumnNames: _col1 + Statistics: Num rows: 63129535 Data size: 7560736760 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: max(_col1) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(28,2)) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -388,7 +412,12 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(37,8)) - Reducer 11 + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(37,8)) + Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -409,7 +438,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col2, _col3 input vertices: - 1 Map 5 + 1 Map 18 Statistics: Num rows: 16583283491 Data size: 265332535856 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: bigint), _col2 (type: bigint) @@ -440,19 +469,121 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 64255141 Data size: 514041128 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 13 + Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) + outputColumnNames: _col0, _col1 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 1468 Data size: 11744 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 3 + Statistics: Num rows: 1468 Data size: 93952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: date) + Reducer 17 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1333117) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, 1, expectedEntries=1323694) + mode: final + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: bigint) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 78525966 Data size: 9394833968 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: _col1 is not null (type: boolean) + Statistics: Num rows: 78525966 Data size: 9394833968 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 78525966 Data size: 9394833968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(28,2)) + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col1, _col2, _col3, _col7 + input vertices: + 0 Map 1 + Statistics: Num rows: 723144634 Data size: 158250632928 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col1, _col2, _col3, _col7, _col8 + input vertices: + 1 Reducer 12 + Statistics: Num rows: 723144634 Data size: 239242831936 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col7 > _col8) (type: boolean) + Statistics: Num rows: 241048211 Data size: 79747610608 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: bigint), _col2 (type: int), _col3 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 241048211 Data size: 25752811344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + input vertices: + 1 Reducer 13 + Statistics: Num rows: 241048211 Data size: 23824425656 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (CAST( _col1 AS decimal(10,0)) * _col2) (type: decimal(18,2)) + outputColumnNames: _col0 + Statistics: Num rows: 241048211 Data size: 26997399632 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(28,2)) + Reducer 7 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -467,7 +598,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -479,67 +610,71 @@ STAGE PLANS: Filter Operator predicate: _col1 is not null (type: boolean) Statistics: Num rows: 78525966 Data size: 9394833968 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Reducer 10 - Statistics: Num rows: 78525966 Data size: 18189742160 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (_col1 > _col2) (type: boolean) - Statistics: Num rows: 26175322 Data size: 6063247392 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 26175322 Data size: 199975264 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 26175322 Data size: 199975264 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 26175322 Data size: 199975264 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 26175322 Data size: 199975264 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 78525966 Data size: 9394833968 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(28,2)) Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: bigint) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 63129535 Data size: 7560736760 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col1 (type: decimal(28,2)) - outputColumnNames: _col1 - Statistics: Num rows: 63129535 Data size: 7560736760 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: max(_col1) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: decimal(28,2)) - Union 2 - Vertex: Union 2 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col2, _col3, _col7 + input vertices: + 0 Map 16 + Statistics: Num rows: 366561385 Data size: 86194887208 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col2, _col3, _col7, _col8 + input vertices: + 1 Reducer 12 + Statistics: Num rows: 366561385 Data size: 127249762328 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (_col7 > _col8) (type: boolean) + Statistics: Num rows: 122187128 Data size: 42416587440 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint), _col2 (type: int), _col3 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 122187128 Data size: 15046670768 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + input vertices: + 1 Reducer 13 + Statistics: Num rows: 122187128 Data size: 14069173744 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: (CAST( _col1 AS decimal(10,0)) * _col2) (type: decimal(18,2)) + outputColumnNames: _col0 + Statistics: Num rows: 122187128 Data size: 13684958336 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: decimal(28,2)) + Union 6 + Vertex: Union 6 Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query33.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query33.q.out index bec4c53aab72..0a6d5314ee0d 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query33.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query33.q.out @@ -7,17 +7,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 11 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) - Map 14 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Reducer 10 <- Map 7 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 15 <- Map 14 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 1 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 12 <- Reducer 8 (BROADCAST_EDGE) + Map 13 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 14 <- Map 13 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -25,7 +24,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_271_container, bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:1.585245486373433E-8 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_274_container, bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:1.585245486373433E-8 Statistics: Num rows: 82510879939 Data size: 10987909046272 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_addr_sk is not null (type: boolean) @@ -52,7 +51,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 11 Statistics: Num rows: 227952808 Data size: 1823622576 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -60,107 +59,27 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 + outputColumnNames: _col2, _col7 input vertices: - 1 Reducer 9 - Statistics: Num rows: 227384408 Data size: 909533152 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 227384408 Data size: 909533152 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 3952 Data size: 458432 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 3952 Data size: 458432 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + 1 Map 12 + Statistics: Num rows: 487484 Data size: 1945456 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 988 Data size: 114604 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 988 Data size: 114604 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: cs_bill_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_275_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:0.0027954892505937553 - Statistics: Num rows: 43005109025 Data size: 5835793041376 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cs_bill_addr_sk is not null (type: boolean) - Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cs_bill_addr_sk (type: bigint), cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 6 - Statistics: Num rows: 723125004 Data size: 79690279120 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 13 - Statistics: Num rows: 120520838 Data size: 2445693184 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 - input vertices: - 1 Reducer 8 - Statistics: Num rows: 120220320 Data size: 1928745152 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 120220320 Data size: 1928745152 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 7904 Data size: 916864 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 7904 Data size: 916864 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 13 Map Operator Tree: TableScan alias: customer_address @@ -193,12 +112,58 @@ STAGE PLANS: Statistics: Num rows: 6666667 Data size: 53333336 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 14 + Map 12 + Map Operator Tree: + TableScan + alias: item + filterExpr: i_manufact_id is not null (type: boolean) + Statistics: Num rows: 462000 Data size: 5539396 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: i_manufact_id is not null (type: boolean) + Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_manufact_id (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 461314 Data size: 5531176 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 461314 Data size: 5531176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 461314 Data size: 5531176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 461314 Data size: 5531176 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 Map Operator Tree: TableScan alias: web_sales filterExpr: ws_bill_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_277_container, bigKeyColName:ws_bill_addr_sk, smallTablePos:1, keyRatio:6.057059039311133E-8 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_280_container, bigKeyColName:ws_bill_addr_sk, smallTablePos:1, keyRatio:6.057059039311133E-8 Statistics: Num rows: 21594638446 Data size: 2936546611376 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_bill_addr_sk is not null (type: boolean) @@ -225,7 +190,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 11 Statistics: Num rows: 61093544 Data size: 7028655600 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -233,34 +198,24 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 + outputColumnNames: _col2, _col7 input vertices: - 1 Reducer 10 - Statistics: Num rows: 60941208 Data size: 6766605856 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 60941208 Data size: 6766605856 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: int) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 26676 Data size: 3094416 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 26676 Data size: 3094416 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + 1 Map 12 + Statistics: Num rows: 130651 Data size: 518124 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 281 Data size: 32592 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 281 Data size: 32592 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 @@ -319,7 +274,7 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 14 + Target Vertex: Map 13 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -341,24 +296,24 @@ STAGE PLANS: Target Input: catalog_sales Partition key expr: cs_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 11 + Target Vertex: Map 9 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan alias: item - filterExpr: (((i_category = 'Books ') and i_manufact_id is not null) or i_manufact_id is not null) (type: boolean) + filterExpr: ((i_category = 'Books ') and i_manufact_id is not null) (type: boolean) Statistics: Num rows: 462000 Data size: 43423396 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((i_category = 'Books ') and i_manufact_id is not null) (type: boolean) Statistics: Num rows: 41895 Data size: 3937718 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i_manufact_id (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 41895 Data size: 167168 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: i_manufact_id + Statistics: Num rows: 41895 Data size: 3937718 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: int) + keys: i_manufact_id (type: int) minReductionHashAggr: 0.97641724 mode: hash outputColumnNames: _col0 @@ -369,62 +324,69 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 988 Data size: 3948 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 988 Data size: 3948 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 988 Data size: 3948 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 9 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: cs_bill_addr_sk is not null (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_283_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:5.9931948980799036E-6 + Statistics: Num rows: 43005109025 Data size: 5835793041376 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator - predicate: i_manufact_id is not null (type: boolean) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE + predicate: cs_bill_addr_sk is not null (type: boolean) + Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: i_item_sk (type: bigint), i_manufact_id (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + expressions: cs_bill_addr_sk (type: bigint), cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 6 + Statistics: Num rows: 723125004 Data size: 79690279120 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 11 + Statistics: Num rows: 120520838 Data size: 2445693184 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col7 + input vertices: + 1 Map 12 + Statistics: Num rows: 257738 Data size: 1026472 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 553 Data size: 64144 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 553 Data size: 64144 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -432,22 +394,22 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 553 Data size: 64144 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) - minReductionHashAggr: 0.66677916 + minReductionHashAggr: 0.45773876 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(27,2)) - Reducer 15 + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -455,20 +417,20 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 988 Data size: 114608 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 281 Data size: 32592 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) - minReductionHashAggr: 0.66677916 + minReductionHashAggr: 0.45773876 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(27,2)) Reducer 2 Execution mode: vectorized, llap @@ -478,20 +440,20 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 988 Data size: 114604 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1) keys: _col0 (type: int) - minReductionHashAggr: 0.66677916 + minReductionHashAggr: 0.45773876 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(27,2)) Reducer 4 Execution mode: vectorized, llap @@ -501,18 +463,18 @@ STAGE PLANS: keys: KEY._col0 (type: int) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: + keys: _col1 (type: decimal(27,2)) null sort order: z - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE top n: 100 Reduce Output Operator key expressions: _col1 (type: decimal(27,2)) null sort order: z sort order: + - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114480 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 5 Execution mode: vectorized, llap @@ -520,7 +482,7 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: decimal(27,2)) outputColumnNames: _col0, _col1 - Statistics: Num rows: 987 Data size: 114492 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 987 Data size: 114464 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 Statistics: Num rows: 100 Data size: 11600 Basic stats: COMPLETE Column stats: COMPLETE @@ -534,29 +496,17 @@ STAGE PLANS: Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int) - outputColumnNames: _col0, _col1 + Group By Operator + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 988 Data size: 3948 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col0 (type: int) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 460848 Data size: 5525584 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: int) + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 988 Data size: 3948 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query56.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query56.q.out index 120408d701ca..fecaaf7a6ebd 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query56.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query56.q.out @@ -7,17 +7,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 11 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) - Map 14 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Reducer 10 <- Map 7 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 15 <- Map 14 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 1 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 12 <- Reducer 8 (BROADCAST_EDGE) + Map 13 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 14 <- Map 13 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -25,7 +24,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_268_container, bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:1.585245486373433E-8 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_271_container, bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:1.585245486373433E-8 Statistics: Num rows: 82510879939 Data size: 10987909046272 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_addr_sk is not null (type: boolean) @@ -52,7 +51,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 11 Statistics: Num rows: 227952808 Data size: 1823622576 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -60,107 +59,27 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 + outputColumnNames: _col2, _col7 input vertices: - 1 Reducer 9 - Statistics: Num rows: 227952808 Data size: 22795280912 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 13435479 Data size: 1343548012 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 5160 Data size: 1093920 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 5160 Data size: 1093920 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 11 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: cs_bill_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_271_container, bigKeyColName:cs_bill_addr_sk, smallTablePos:1, keyRatio:0.002802477211020162 - Statistics: Num rows: 43005109025 Data size: 5835793041376 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cs_bill_addr_sk is not null (type: boolean) - Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cs_bill_addr_sk (type: bigint), cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 6 - Statistics: Num rows: 723125004 Data size: 79690279120 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 13 - Statistics: Num rows: 120520838 Data size: 2445693184 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 - input vertices: - 1 Reducer 8 - Statistics: Num rows: 120520838 Data size: 13533610280 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 7103466 Data size: 710346712 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 + 1 Map 12 + Statistics: Num rows: 7198277 Data size: 719827812 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2580 Data size: 546960 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 2580 Data size: 546960 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 2580 Data size: 546960 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 13 + Map 11 Map Operator Tree: TableScan alias: customer_address @@ -193,12 +112,54 @@ STAGE PLANS: Statistics: Num rows: 6666667 Data size: 53333336 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 14 + Map 12 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 27230 Data size: 2940840 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 27230 Data size: 2940840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 27230 Data size: 2940840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 27230 Data size: 2940840 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 Map Operator Tree: TableScan alias: web_sales filterExpr: ws_bill_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_274_container, bigKeyColName:ws_bill_addr_sk, smallTablePos:1, keyRatio:6.057059039311133E-8 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_277_container, bigKeyColName:ws_bill_addr_sk, smallTablePos:1, keyRatio:6.057059039311133E-8 Statistics: Num rows: 21594638446 Data size: 2936546611376 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_bill_addr_sk is not null (type: boolean) @@ -225,7 +186,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 11 Statistics: Num rows: 61093544 Data size: 7028655600 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -233,34 +194,24 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 + outputColumnNames: _col2, _col7 input vertices: - 1 Reducer 10 - Statistics: Num rows: 61093544 Data size: 12649261648 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 3600837 Data size: 460807764 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1720 Data size: 364640 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1720 Data size: 364640 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + 1 Map 12 + Statistics: Num rows: 1929208 Data size: 192920912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 860 Data size: 182320 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 860 Data size: 182320 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 @@ -319,7 +270,7 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 14 + Target Vertex: Map 13 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -341,23 +292,24 @@ STAGE PLANS: Target Input: catalog_sales Partition key expr: cs_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 11 + Target Vertex: Map 9 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan alias: item + filterExpr: (i_color) IN ('chiffon ', 'lace ', 'orchid ') (type: boolean) Statistics: Num rows: 462000 Data size: 87318000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (i_color) IN ('chiffon ', 'lace ', 'orchid ') (type: boolean) Statistics: Num rows: 14589 Data size: 2757321 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i_item_id (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 14589 Data size: 1458900 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: i_item_id + Statistics: Num rows: 14589 Data size: 2757321 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string) + keys: i_item_id (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 @@ -368,59 +320,69 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 14589 Data size: 1458900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14589 Data size: 1458900 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 14589 Data size: 1458900 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 10 + Map 9 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: cs_bill_addr_sk is not null (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_280_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:8.849641557210306E-5 + Statistics: Num rows: 43005109025 Data size: 5835793041376 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cs_bill_addr_sk is not null (type: boolean) + Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cs_bill_addr_sk (type: bigint), cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 6 + Statistics: Num rows: 723125004 Data size: 79690279120 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 11 + Statistics: Num rows: 120520838 Data size: 2445693184 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col7 + input vertices: + 1 Map 12 + Statistics: Num rows: 3805798 Data size: 380579912 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1720 Data size: 364640 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1720 Data size: 364640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 12 + LLAP IO: may be used (ACID table) + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -443,7 +405,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 860 Data size: 182320 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(27,2)) - Reducer 15 + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -530,29 +492,17 @@ STAGE PLANS: Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 14589 Data size: 1458900 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 14589 Data size: 1458900 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out index 0091d3885c34..f4f3f3e2b59c 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query58.q.out @@ -1,4 +1,4 @@ -Warning: Map Join MAPJOIN[375][bigTable=?] in task 'Map 5' is a cross product +Warning: Map Join MAPJOIN[396][bigTable=?] in task 'Map 11' is a cross product STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -8,105 +8,19 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 5 <- Map 3 (BROADCAST_EDGE), Reducer 4 (BROADCAST_EDGE) - Map 6 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Map 9 <- Map 11 (BROADCAST_EDGE), Map 3 (BROADCAST_EDGE), Map 5 (BROADCAST_EDGE) - Reducer 10 <- Map 9 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 10 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 8 <- Reducer 7 (SIMPLE_EDGE) + Map 11 <- Map 1 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) + Reducer 10 <- Reducer 9 (SIMPLE_EDGE) + Reducer 12 <- Map 1 (BROADCAST_EDGE), Map 11 (SIMPLE_EDGE) + Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE) + Reducer 15 <- Reducer 14 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 3 (CUSTOM_SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (SIMPLE_EDGE) + Reducer 8 <- Map 6 (BROADCAST_EDGE), Map 7 (CUSTOM_SIMPLE_EDGE), Reducer 12 (CUSTOM_SIMPLE_EDGE) + Reducer 9 <- Reducer 15 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE), Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 - Map Operator Tree: - TableScan - alias: catalog_sales - Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 43005109025 Data size: 7556852441408 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 43005109025 Data size: 5148566336008 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 43005109025 Data size: 9105036366308 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 8803686108 Data size: 1866381454896 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 8803686108 Data size: 1866381454896 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 11 - Map Operator Tree: - TableScan - alias: item - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 3 Map Operator Tree: TableScan alias: date_dim @@ -120,74 +34,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cs_sold_date_sk (bigint) - Target Input: catalog_sales - Partition key expr: cs_sold_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ss_sold_date_sk (bigint) - Target Input: store_sales - Partition key expr: ss_sold_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 6 - Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col1 (type: date) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Map-reduce partition columns: _col1 (type: date) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: ws_sold_date_sk (bigint) - Target Input: web_sales - Partition key expr: ws_sold_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 9 + value expressions: _col0 (type: bigint) Filter Operator predicate: ((d_date = DATE'1998-02-19') and d_week_seq is not null) (type: boolean) Statistics: Num rows: 36524 Data size: 2191440 Basic stats: COMPLETE Column stats: COMPLETE @@ -219,7 +71,7 @@ STAGE PLANS: value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 + Map 11 Map Operator Tree: TableScan alias: date_dim @@ -240,7 +92,7 @@ STAGE PLANS: 1 outputColumnNames: _col0, _col1 input vertices: - 1 Reducer 4 + 1 Reducer 2 Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -250,7 +102,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 3 + 1 Map 1 Statistics: Num rows: 236172 Data size: 13225632 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) @@ -264,131 +116,234 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: date) Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 6 + Map 13 Map Operator Tree: TableScan - alias: store_sales - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + alias: web_sales + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) + expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 82510879939 Data size: 14303918963024 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 82510879939 Data size: 9683309686440 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 82510879939 Data size: 17274310640828 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 16702424472 Data size: 3540913988064 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 16702424472 Data size: 3540913988064 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: decimal(7,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 9 + Map 3 Map Operator Tree: TableScan - alias: web_sales - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE + alias: catalog_sales + Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: ws_item_sk (type: bigint), ws_ext_sales_price (type: decimal(7,2)), ws_sold_date_sk (type: bigint) + expressions: cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 21594638446 Data size: 2763811113552 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 3 - Statistics: Num rows: 21594638446 Data size: 3800353758960 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 5 - Statistics: Num rows: 21594638446 Data size: 2591054005984 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 11 - Statistics: Num rows: 21594638446 Data size: 4577760743016 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 4426224168 Data size: 938359523616 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 4426224168 Data size: 938359523616 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 43005109025 Data size: 5492607208208 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: decimal(7,2)) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 6 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 7 + Map Operator Tree: + TableScan + alias: store_sales + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ss_item_sk (type: bigint), ss_ext_sales_price (type: decimal(7,2)), ss_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 82510879939 Data size: 10343396725952 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: decimal(7,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Reducer 10 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 100 + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 12 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0 + input vertices: + 0 Map 1 + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5699400 Data size: 45595200 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cs_sold_date_sk (bigint) + Target Input: catalog_sales + Partition key expr: cs_sold_date_sk + Statistics: Num rows: 5699400 Data size: 45595200 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 3 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5699400 Data size: 45595200 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ss_sold_date_sk (bigint) + Target Input: store_sales + Partition key expr: ss_sold_date_sk + Statistics: Num rows: 5699400 Data size: 45595200 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 7 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 2668041676 Data size: 21344333408 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 5699400 Data size: 45595200 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: ws_sold_date_sk (bigint) + Target Input: web_sales + Partition key expr: ws_sold_date_sk + Statistics: Num rows: 5699400 Data size: 45595200 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 13 + Reducer 14 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col3, _col4 + input vertices: + 0 Reducer 12 + Statistics: Num rows: 1669782601 Data size: 200071304584 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col4, _col7 + input vertices: + 1 Map 6 + Statistics: Num rows: 1669782601 Data size: 353691303876 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 342078168 Data size: 72520571616 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 342078168 Data size: 72520571616 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Reducer 15 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -409,6 +364,61 @@ STAGE PLANS: Statistics: Num rows: 247524 Data size: 107920464 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)), _col2 (type: decimal(19,3)), _col3 (type: decimal(20,3)) Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: sq_count_check(_col0) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col3, _col4 + input vertices: + 0 Reducer 12 + Statistics: Num rows: 3325324616 Data size: 386992206928 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col4, _col7 + input vertices: + 1 Map 6 + Statistics: Num rows: 3325324616 Data size: 692922071600 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 670047468 Data size: 142050063216 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 670047468 Data size: 142050063216 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Reducer 5 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -424,24 +434,45 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 247524 Data size: 52475088 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(17,2)) - Reducer 4 + Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: sq_count_check(_col0) (type: boolean) - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col3, _col4 + input vertices: + 0 Reducer 12 + Statistics: Num rows: 6380066609 Data size: 547612086840 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col4, _col7 + input vertices: + 1 Map 6 + Statistics: Num rows: 6380066609 Data size: 1134578214868 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1097026368 Data size: 232569590016 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - null sort order: - sort order: - Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE - Reducer 7 + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 1097026368 Data size: 232569590016 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) + Reducer 9 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -458,7 +489,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 2 + 0 Reducer 5 Statistics: Num rows: 247524 Data size: 80197776 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col1 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col3 BETWEEN (0.9 * _col1) AND (1.1 * _col1)) (type: boolean) @@ -471,7 +502,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col5, _col6, _col7 input vertices: - 1 Reducer 10 + 1 Reducer 15 Statistics: Num rows: 3055 Data size: 2016300 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (_col5 BETWEEN (0.9 * _col1) AND (1.1 * _col1) and _col5 BETWEEN (0.9 * _col3) AND (1.1 * _col3) and _col1 BETWEEN _col6 AND _col7 and _col3 BETWEEN _col6 AND _col7) (type: boolean) @@ -492,23 +523,6 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(38,17)), _col3 (type: decimal(17,2)), _col4 (type: decimal(38,17)), _col5 (type: decimal(17,2)), _col6 (type: decimal(38,17)), _col7 (type: decimal(23,6)) - Reducer 8 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: decimal(17,2)), VALUE._col0 (type: decimal(38,17)), VALUE._col1 (type: decimal(17,2)), VALUE._col2 (type: decimal(38,17)), VALUE._col3 (type: decimal(17,2)), VALUE._col4 (type: decimal(38,17)), VALUE._col5 (type: decimal(23,6)) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - Limit - Number of rows: 100 - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 1 Data size: 884 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query60.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query60.q.out index b0e5ba681a36..4cb67964ad7e 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query60.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query60.q.out @@ -7,17 +7,16 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 11 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) - Map 14 <- Map 13 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 7 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Reducer 10 <- Map 7 (SIMPLE_EDGE) - Reducer 12 <- Map 11 (SIMPLE_EDGE), Union 3 (CONTAINS) - Reducer 15 <- Map 14 (SIMPLE_EDGE), Union 3 (CONTAINS) + Map 1 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 12 <- Reducer 8 (BROADCAST_EDGE) + Map 13 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Map 9 <- Map 11 (BROADCAST_EDGE), Map 12 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) + Reducer 10 <- Map 9 (SIMPLE_EDGE), Union 3 (CONTAINS) + Reducer 14 <- Map 13 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 2 <- Map 1 (SIMPLE_EDGE), Union 3 (CONTAINS) Reducer 4 <- Union 3 (SIMPLE_EDGE) Reducer 5 <- Reducer 4 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Map 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -25,7 +24,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_273_container, bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:1.585245486373433E-8 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_276_container, bigKeyColName:ss_addr_sk, smallTablePos:1, keyRatio:1.585245486373433E-8 Statistics: Num rows: 82510879939 Data size: 10987909046272 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_addr_sk is not null (type: boolean) @@ -52,7 +51,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 11 Statistics: Num rows: 227952808 Data size: 1823622576 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -60,107 +59,27 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 + outputColumnNames: _col2, _col7 input vertices: - 1 Reducer 9 - Statistics: Num rows: 227952808 Data size: 22795280912 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 38679150 Data size: 3867915112 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 114032 Data size: 24174784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 114032 Data size: 24174784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + 1 Map 12 + Statistics: Num rows: 20722984 Data size: 2072298512 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 64143 Data size: 13598316 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 64143 Data size: 13598316 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 11 - Map Operator Tree: - TableScan - alias: catalog_sales - filterExpr: cs_bill_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_276_container, bigKeyColName:cs_bill_addr_sk, smallTablePos:1, keyRatio:0.002802477211020162 - Statistics: Num rows: 43005109025 Data size: 5835793041376 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: cs_bill_addr_sk is not null (type: boolean) - Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cs_bill_addr_sk (type: bigint), cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col3 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col2 - input vertices: - 1 Map 6 - Statistics: Num rows: 723125004 Data size: 79690279120 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col2 - input vertices: - 1 Map 13 - Statistics: Num rows: 120520838 Data size: 2445693184 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col1 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 - input vertices: - 1 Reducer 8 - Statistics: Num rows: 120520838 Data size: 13533610280 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 20450037 Data size: 2045003812 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57016 Data size: 12087392 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 57016 Data size: 12087392 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 13 Map Operator Tree: TableScan alias: customer_address @@ -193,12 +112,54 @@ STAGE PLANS: Statistics: Num rows: 6666667 Data size: 53333336 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 14 + Map 12 + Map Operator Tree: + TableScan + alias: item + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: i_item_sk (type: bigint), i_item_id (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1 + input vertices: + 1 Reducer 8 + Statistics: Num rows: 78392 Data size: 8466336 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 78392 Data size: 8466336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 78392 Data size: 8466336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 78392 Data size: 8466336 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 13 Map Operator Tree: TableScan alias: web_sales filterExpr: ws_bill_addr_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_279_container, bigKeyColName:ws_bill_addr_sk, smallTablePos:1, keyRatio:6.057059039311133E-8 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_282_container, bigKeyColName:ws_bill_addr_sk, smallTablePos:1, keyRatio:6.057059039311133E-8 Statistics: Num rows: 21594638446 Data size: 2936546611376 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ws_bill_addr_sk is not null (type: boolean) @@ -225,7 +186,7 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col0, _col2 input vertices: - 1 Map 13 + 1 Map 11 Statistics: Num rows: 61093544 Data size: 7028655600 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -233,34 +194,24 @@ STAGE PLANS: keys: 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col2, _col10 + outputColumnNames: _col2, _col7 input vertices: - 1 Reducer 10 - Statistics: Num rows: 61093544 Data size: 12649261648 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col10 (type: string) - 1 _col0 (type: string) - outputColumnNames: _col2, _col10 - input vertices: - 1 Map 7 - Statistics: Num rows: 10366384 Data size: 1895103728 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col10 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 57016 Data size: 12087392 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 57016 Data size: 12087392 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: decimal(17,2)) + 1 Map 12 + Statistics: Num rows: 5553959 Data size: 874869628 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 28508 Data size: 6043696 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 28508 Data size: 6043696 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 6 @@ -319,7 +270,7 @@ STAGE PLANS: Target Input: web_sales Partition key expr: ws_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 14 + Target Vertex: Map 13 Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -341,23 +292,24 @@ STAGE PLANS: Target Input: catalog_sales Partition key expr: cs_sold_date_sk Statistics: Num rows: 31 Data size: 248 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 11 + Target Vertex: Map 9 Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 7 Map Operator Tree: TableScan alias: item + filterExpr: (i_category = 'Children ') (type: boolean) Statistics: Num rows: 462000 Data size: 87780000 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (i_category = 'Children ') (type: boolean) Statistics: Num rows: 42000 Data size: 7980000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: i_item_id (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 42000 Data size: 4200000 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: i_item_id + Statistics: Num rows: 42000 Data size: 7980000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - keys: _col0 (type: string) + keys: i_item_id (type: string) minReductionHashAggr: 0.4 mode: hash outputColumnNames: _col0 @@ -368,59 +320,69 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 42000 Data size: 4200000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 42000 Data size: 4200000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 42000 Data size: 4200000 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: i_item_sk (type: bigint), i_item_id (type: string) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Reducer 10 + Map 9 + Map Operator Tree: + TableScan + alias: catalog_sales + filterExpr: cs_bill_addr_sk is not null (type: boolean) + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_285_container, bigKeyColName:cs_item_sk, smallTablePos:1, keyRatio:2.547706830281661E-4 + Statistics: Num rows: 43005109025 Data size: 5835793041376 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: cs_bill_addr_sk is not null (type: boolean) + Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cs_bill_addr_sk (type: bigint), cs_item_sk (type: bigint), cs_ext_sales_price (type: decimal(7,2)), cs_sold_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 42898229145 Data size: 5821289442328 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 6 + Statistics: Num rows: 723125004 Data size: 79690279120 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col1, _col2 + input vertices: + 1 Map 11 + Statistics: Num rows: 120520838 Data size: 2445693184 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col2, _col7 + input vertices: + 1 Map 12 + Statistics: Num rows: 10956441 Data size: 1095644212 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 35635 Data size: 7554620 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35635 Data size: 7554620 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(17,2)) Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 12 + LLAP IO: may be used (ACID table) + Reducer 10 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -449,7 +411,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 7127 Data size: 1510924 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(27,2)) - Reducer 15 + Reducer 14 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -547,29 +509,17 @@ STAGE PLANS: Reducer 8 Execution mode: vectorized, llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) - Reducer 9 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: string) - outputColumnNames: _col0, _col1 + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 42000 Data size: 4200000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col0 (type: string) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 462000 Data size: 49896000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: string) + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 42000 Data size: 4200000 Basic stats: COMPLETE Column stats: COMPLETE Union 3 Vertex: Union 3 diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query70.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query70.q.out index f417133c1fa9..04b71c367587 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query70.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query70.q.out @@ -7,13 +7,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) Map 7 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Reducer 4 <- Reducer 3 (SIMPLE_EDGE) Reducer 8 <- Map 7 (SIMPLE_EDGE) - Reducer 9 <- Reducer 8 (SIMPLE_EDGE) + Reducer 9 <- Map 6 (BROADCAST_EDGE), Reducer 8 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -21,7 +21,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_store_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_120_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:0.1919754858718087 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_123_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:0.1919754858718087 Statistics: Num rows: 82510879939 Data size: 10327900046896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_store_sk is not null (type: boolean) @@ -48,37 +48,27 @@ STAGE PLANS: 1 _col0 (type: bigint) outputColumnNames: _col1, _col6, _col7 input vertices: - 1 Map 6 + 1 Reducer 9 Statistics: Num rows: 15840066266 Data size: 4475829148384 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col7 (type: char(2)) - 1 _col0 (type: char(2)) - outputColumnNames: _col1, _col6, _col7 - input vertices: - 1 Reducer 9 - Statistics: Num rows: 5040021084 Data size: 1279015774512 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col7 (type: char(2)), _col6 (type: varchar(30)), _col1 (type: decimal(7,2)) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 5040021084 Data size: 1279015774512 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col2) - keys: _col0 (type: char(2)), _col1 (type: varchar(30)), 0L (type: bigint) - grouping sets: 0, 1, 3 - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 3073155 Data size: 934239120 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: char(2)), _col1 (type: varchar(30)), _col2 (type: bigint) - null sort order: zzz - sort order: +++ - Map-reduce partition columns: _col0 (type: char(2)), _col1 (type: varchar(30)), _col2 (type: bigint) - Statistics: Num rows: 3073155 Data size: 934239120 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col3 (type: decimal(17,2)) + Select Operator + expressions: _col7 (type: char(2)), _col6 (type: varchar(30)), _col1 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 15840066266 Data size: 4475829148384 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col2) + keys: _col0 (type: char(2)), _col1 (type: varchar(30)), 0L (type: bigint) + grouping sets: 0, 1, 3 + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 10752660 Data size: 3268808640 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: char(2)), _col1 (type: varchar(30)), _col2 (type: bigint) + null sort order: zzz + sort order: +++ + Map-reduce partition columns: _col0 (type: char(2)), _col1 (type: varchar(30)), _col2 (type: bigint) + Statistics: Num rows: 10752660 Data size: 3268808640 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col3 (type: decimal(17,2)) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 5 @@ -154,12 +144,12 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1704 Data size: 327168 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col2 (type: char(2)) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Map-reduce partition columns: _col2 (type: char(2)) Statistics: Num rows: 1704 Data size: 327168 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: varchar(30)), _col2 (type: char(2)) + value expressions: _col0 (type: bigint), _col1 (type: varchar(30)) Select Operator expressions: s_store_sk (type: bigint), s_state (type: char(2)) outputColumnNames: _col0, _col1 @@ -178,7 +168,7 @@ STAGE PLANS: TableScan alias: store_sales filterExpr: ss_store_sk is not null (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_122_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:0.1919754858718087 + probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_121_container, bigKeyColName:ss_store_sk, smallTablePos:1, keyRatio:0.1919754858718087 Statistics: Num rows: 82510879939 Data size: 10327900046896 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ss_store_sk is not null (type: boolean) @@ -361,18 +351,23 @@ STAGE PLANS: expressions: _col0 (type: char(2)) outputColumnNames: _col0 Statistics: Num rows: 14 Data size: 1204 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: char(2)) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 14 Data size: 1204 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col2 (type: char(2)) + 1 _col0 (type: char(2)) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 0 Map 6 + Statistics: Num rows: 542 Data size: 104064 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col0 (type: char(2)) + key expressions: _col0 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: char(2)) - Statistics: Num rows: 14 Data size: 1204 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 542 Data size: 104064 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: varchar(30)), _col2 (type: char(2)) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out index 39b248473790..a71bc0350de4 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query83.q.out @@ -7,75 +7,76 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE) - Map 12 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) - Map 3 <- Map 8 (BROADCAST_EDGE) - Map 5 <- Map 4 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 10 (BROADCAST_EDGE) - Map 8 <- Map 11 (BROADCAST_EDGE) - Reducer 10 <- Map 8 (SIMPLE_EDGE) - Reducer 13 <- Map 12 (SIMPLE_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 6 <- Map 5 (SIMPLE_EDGE), Reducer 13 (BROADCAST_EDGE), Reducer 2 (BROADCAST_EDGE) - Reducer 7 <- Reducer 6 (SIMPLE_EDGE) - Reducer 9 <- Map 8 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Map 3 <- Map 14 (BROADCAST_EDGE) + Reducer 11 <- Map 10 (CUSTOM_SIMPLE_EDGE), Map 9 (BROADCAST_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 12 <- Reducer 11 (SIMPLE_EDGE), Reducer 17 (BROADCAST_EDGE), Reducer 8 (BROADCAST_EDGE) + Reducer 13 <- Reducer 12 (SIMPLE_EDGE) + Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE), Map 9 (BROADCAST_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 17 <- Reducer 16 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) + Reducer 4 <- Map 3 (SIMPLE_EDGE), Reducer 5 (BROADCAST_EDGE) + Reducer 5 <- Map 3 (SIMPLE_EDGE) + Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE), Map 9 (BROADCAST_EDGE), Reducer 2 (CUSTOM_SIMPLE_EDGE) + Reducer 8 <- Reducer 7 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: catalog_returns - Statistics: Num rows: 4320980099 Data size: 86073249960 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: cr_item_sk (type: bigint), cr_return_quantity (type: int), cr_returned_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 4320980099 Data size: 86073249960 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Map 8 - Statistics: Num rows: 4320980099 Data size: 293480294712 Basic stats: COMPLETE Column stats: COMPLETE + alias: date_dim + filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: (d_week_seq is not null and d_date is not null) (type: boolean) + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date (type: date), d_week_seq (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Semi Join 0 to 1 keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 input vertices: 1 Map 3 - Statistics: Num rows: 4320980099 Data size: 51505409168 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 4 - Statistics: Num rows: 4320980099 Data size: 449035578276 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 434404620 Data size: 46915698960 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 434404620 Data size: 46915698960 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: date) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: date) + Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 10 + Map Operator Tree: + TableScan + alias: store_returns + Statistics: Num rows: 8332595709 Data size: 166044313360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: sr_item_sk (type: bigint), sr_return_quantity (type: int), sr_returned_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 8332595709 Data size: 166044313360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 8332595709 Data size: 166044313360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 11 + Map 14 Map Operator Tree: TableScan alias: date_dim @@ -102,7 +103,7 @@ STAGE PLANS: Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 12 + Map 15 Map Operator Tree: TableScan alias: web_returns @@ -111,59 +112,40 @@ STAGE PLANS: expressions: wr_item_sk (type: bigint), wr_return_quantity (type: int), wr_returned_date_sk (type: bigint) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 2062802370 Data size: 41061626908 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Reducer 9 - Statistics: Num rows: 2062802370 Data size: 140076140668 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 8 - Statistics: Num rows: 2062802370 Data size: 24559207948 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 4 - Statistics: Num rows: 2062802370 Data size: 214337025988 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 207425112 Data size: 22401912096 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 207425112 Data size: 22401912096 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) + Reduce Output Operator + key expressions: _col2 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 2062802370 Data size: 41061626908 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: int) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 3 Map Operator Tree: TableScan alias: date_dim - filterExpr: (d_week_seq is not null and d_date is not null) (type: boolean) - probeDecodeDetails: cacheKey:HASH_MAP_MAPJOIN_322_container, bigKeyColName:d_week_seq, smallTablePos:1, keyRatio:0.0 + filterExpr: (((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or (d_week_seq is not null and d_date is not null) or d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) + Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_week_seq (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: int) + minReductionHashAggr: 0.690705 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (d_week_seq is not null and d_date is not null) (type: boolean) Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE @@ -179,7 +161,7 @@ STAGE PLANS: 1 _col0 (type: int) outputColumnNames: _col0 input vertices: - 1 Map 8 + 1 Map 14 Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator keys: _col0 (type: date) @@ -193,9 +175,48 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: date) Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: d_date is not null (type: boolean) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: d_date_sk (type: bigint), d_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: date) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) + Reduce Output Operator + key expressions: _col1 (type: date) + null sort order: z + sort order: + + Map-reduce partition columns: _col1 (type: date) + Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 4 + Map 6 + Map Operator Tree: + TableScan + alias: catalog_returns + Statistics: Num rows: 4320980099 Data size: 86073249960 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: cr_item_sk (type: bigint), cr_return_quantity (type: int), cr_returned_date_sk (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4320980099 Data size: 86073249960 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col2 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col2 (type: bigint) + Statistics: Num rows: 4320980099 Data size: 86073249960 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: may be used (ACID table) + Map 9 Map Operator Tree: TableScan alias: item @@ -227,249 +248,45 @@ STAGE PLANS: value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) - Map 5 - Map Operator Tree: - TableScan - alias: store_returns - Statistics: Num rows: 8332595709 Data size: 166044313360 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: sr_item_sk (type: bigint), sr_return_quantity (type: int), sr_returned_date_sk (type: bigint) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 8332595709 Data size: 166044313360 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col2 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col0, _col1, _col4 - input vertices: - 1 Reducer 10 - Statistics: Num rows: 8332595709 Data size: 566008907392 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col4 (type: date) - 1 _col0 (type: date) - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 8 - Statistics: Num rows: 8332595709 Data size: 99383547688 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 _col0 (type: bigint) - 1 _col0 (type: bigint) - outputColumnNames: _col1, _col6 - input vertices: - 1 Map 4 - Statistics: Num rows: 8332595709 Data size: 865982352916 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: sum(_col1) - keys: _col6 (type: string) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 837373692 Data size: 90436358736 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 837373692 Data size: 90436358736 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Map 8 - Map Operator Tree: - TableScan - alias: date_dim - filterExpr: ((d_week_seq is not null and d_date is not null) or ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) or d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: (d_week_seq is not null and d_date is not null) (type: boolean) - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date (type: date), d_week_seq (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4382940 Basic stats: COMPLETE Column stats: COMPLETE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 _col1 (type: int) - 1 _col0 (type: int) - outputColumnNames: _col0 - input vertices: - 1 Map 11 - Statistics: Num rows: 73049 Data size: 4090744 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: date) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: date) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: date) - Statistics: Num rows: 36524 Data size: 2045344 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: ((d_date) IN (DATE'1998-01-02', DATE'1998-10-15', DATE'1998-11-10') and d_week_seq is not null) (type: boolean) - Statistics: Num rows: 36525 Data size: 2191500 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_week_seq (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 36525 Data size: 146100 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: int) - minReductionHashAggr: 0.690705 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: int) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11297 Data size: 45188 Basic stats: COMPLETE Column stats: COMPLETE - Filter Operator - predicate: d_date is not null (type: boolean) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: d_date_sk (type: bigint), d_date (type: date) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: wr_returned_date_sk (bigint) - Target Input: web_returns - Partition key expr: wr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 12 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: cr_returned_date_sk (bigint) - Target Input: catalog_returns - Partition key expr: cr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Select Operator - expressions: _col0 (type: bigint) - outputColumnNames: _col0 - Statistics: Num rows: 73049 Data size: 584392 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - keys: _col0 (type: bigint) - minReductionHashAggr: 0.4 - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Dynamic Partitioning Event Operator - Target column: sr_returned_date_sk (bigint) - Target Input: store_returns - Partition key expr: sr_returned_date_sk - Statistics: Num rows: 67850 Data size: 542800 Basic stats: COMPLETE Column stats: COMPLETE - Target Vertex: Map 5 - Execution mode: vectorized, llap - LLAP IO: may be used (ACID table) - Reducer 10 - Execution mode: vectorized, llap - Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) - outputColumnNames: _col0, _col1 - Reduce Output Operator - key expressions: _col0 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: bigint) - Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) - Reducer 13 + Reducer 11 Execution mode: vectorized, llap Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint), _col2 (type: double) - Reducer 2 - Execution mode: vectorized, llap - Reduce Operator Tree: - Group By Operator - aggregations: sum(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - null sort order: z - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 6 + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col3, _col4 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 322153773 Data size: 3258244456 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col4, _col7 + input vertices: + 1 Map 9 + Statistics: Num rows: 322153773 Data size: 32896391572 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 31930596 Data size: 3448504368 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 31930596 Data size: 3448504368 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 12 Execution mode: vectorized, llap Reduce Operator Tree: Group By Operator @@ -486,7 +303,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3 input vertices: - 0 Reducer 2 + 0 Reducer 8 Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -496,7 +313,7 @@ STAGE PLANS: 1 _col0 (type: string) outputColumnNames: _col0, _col1, _col3, _col5, _col6 input vertices: - 1 Reducer 13 + 1 Reducer 17 Statistics: Num rows: 247524 Data size: 32673168 Basic stats: COMPLETE Column stats: COMPLETE Top N Key Operator sort order: ++ @@ -514,7 +331,7 @@ STAGE PLANS: sort order: ++ Statistics: Num rows: 247524 Data size: 64356240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: bigint), _col4 (type: double), _col5 (type: bigint), _col6 (type: double), _col7 (type: decimal(25,6)) - Reducer 7 + Reducer 13 Execution mode: vectorized, llap Reduce Operator Tree: Select Operator @@ -531,19 +348,233 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 9 + Reducer 16 Execution mode: vectorized, llap Reduce Operator Tree: - Select Operator - expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: date) + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col3, _col4 + input vertices: + 0 Reducer 4 + Statistics: Num rows: 79751808 Data size: 762601204 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col4, _col7 + input vertices: + 1 Map 9 + Statistics: Num rows: 79751808 Data size: 8099767540 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 7920768 Data size: 855442944 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 7920768 Data size: 855442944 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 17 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: string), _col1 (type: bigint), UDFToDouble(_col1) (type: double) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 247524 Data size: 28712784 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint), _col2 (type: double) + Reducer 2 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 1022672 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0 + input vertices: + 0 Map 3 + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2849700 Data size: 22797600 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: cr_returned_date_sk (bigint) + Target Input: catalog_returns + Partition key expr: cr_returned_date_sk + Statistics: Num rows: 2849700 Data size: 22797600 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 6 + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2849700 Data size: 22797600 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: sr_returned_date_sk (bigint) + Target Input: store_returns + Partition key expr: sr_returned_date_sk + Statistics: Num rows: 2849700 Data size: 22797600 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 10 + Reducer 4 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: date) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 18262 Data size: 1022672 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col1 (type: date) + 1 _col0 (type: date) + outputColumnNames: _col0 + input vertices: + 0 Reducer 5 + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1334020838 Data size: 10672166704 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: _col0 (type: bigint) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 2849700 Data size: 22797600 Basic stats: COMPLETE Column stats: COMPLETE + Dynamic Partitioning Event Operator + Target column: wr_returned_date_sk (bigint) + Target Input: web_returns + Partition key expr: wr_returned_date_sk + Statistics: Num rows: 2849700 Data size: 22797600 Basic stats: COMPLETE Column stats: COMPLETE + Target Vertex: Map 15 + Reducer 5 + Execution mode: vectorized, llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: bigint) + outputColumnNames: _col1, _col0 Reduce Output Operator - key expressions: _col0 (type: bigint) + key expressions: _col1 (type: date) null sort order: z sort order: + - Map-reduce partition columns: _col0 (type: bigint) + Map-reduce partition columns: _col1 (type: date) Statistics: Num rows: 73049 Data size: 4675136 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: date) + value expressions: _col0 (type: bigint) + Reducer 7 + Execution mode: vectorized, llap + Reduce Operator Tree: + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col3, _col4 + input vertices: + 0 Reducer 2 + Statistics: Num rows: 167057192 Data size: 1658334284 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true + Map Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col3 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col4, _col7 + input vertices: + 1 Map 9 + Statistics: Num rows: 167057192 Data size: 17027595948 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col4) + keys: _col7 (type: string) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 16584108 Data size: 1791083664 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 16584108 Data size: 1791083664 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 8 + Execution mode: vectorized, llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 247524 Data size: 26732592 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator diff --git a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out index 3a966e9f29c8..16a410fc7131 100644 --- a/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out +++ b/ql/src/test/results/clientpositive/perf/tpcds30tb/tez/query95.q.out @@ -67,28 +67,32 @@ STAGE PLANS: input vertices: 1 Map 10 Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col3 (type: bigint) - null sort order: z - sort order: + - Map-reduce partition columns: _col3 (type: bigint) - Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) Select Operator - expressions: _col3 (type: bigint) - outputColumnNames: _col3 - Statistics: Num rows: 6463723 Data size: 51709784 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: min(_col3), max(_col3), bloom_filter(_col3, expectedEntries=1000000) - minReductionHashAggr: 0.99 - mode: hash - outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - null sort order: - sort order: + expressions: _col3 (type: bigint), _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: bigint) + null sort order: z + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: decimal(7,2)), _col2 (type: decimal(7,2)) + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 6463723 Data size: 51709784 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: binary) Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map 10 @@ -342,24 +346,24 @@ STAGE PLANS: keys: 0 KEY.reducesinkkey0 (type: bigint) 1 KEY.reducesinkkey0 (type: bigint) - outputColumnNames: _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2 input vertices: 1 Reducer 13 Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE DynamicPartitionHashJoin: true Reduce Output Operator - key expressions: _col3 (type: bigint) + key expressions: _col0 (type: bigint) null sort order: z sort order: + - Map-reduce partition columns: _col3 (type: bigint) + Map-reduce partition columns: _col0 (type: bigint) Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col4 (type: decimal(7,2)), _col5 (type: decimal(7,2)) + value expressions: _col1 (type: decimal(7,2)), _col2 (type: decimal(7,2)) Select Operator - expressions: _col3 (type: bigint) - outputColumnNames: _col3 + expressions: _col0 (type: bigint) + outputColumnNames: _col0 Statistics: Num rows: 6463723 Data size: 51709784 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: min(_col3), max(_col3), bloom_filter(_col3, expectedEntries=1000000) + aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=1000000) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 @@ -376,13 +380,13 @@ STAGE PLANS: condition map: Left Semi Join 0 to 1 keys: - 0 _col3 (type: bigint) + 0 _col0 (type: bigint) 1 _col0 (type: bigint) - outputColumnNames: _col3, _col4, _col5 + outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6463723 Data size: 895528872 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col4), sum(_col5) - keys: _col3 (type: bigint) + aggregations: sum(_col1), sum(_col2) + keys: _col0 (type: bigint) minReductionHashAggr: 0.9166043 mode: hash outputColumnNames: _col0, _col2, _col3