Skip to content

Commit b16af6c

Browse files
authored
HIVE-28520: Upgrade to datasketches 2.0.0 (#5444)
1 parent d6bcdf6 commit b16af6c

19 files changed

+1327
-1234
lines changed

pom.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@
220220
<rs-api.version>2.0.1</rs-api.version>
221221
<json-path.version>2.9.0</json-path.version>
222222
<janino.version>3.1.12</janino.version>
223-
<datasketches.version>1.2.0</datasketches.version>
223+
<datasketches.version>2.0.0</datasketches.version>
224224
<spotbugs.version>4.8.6</spotbugs.version>
225225
<validation-api.version>1.1.0.Final</validation-api.version>
226226
<aws-secretsmanager-caching.version>1.0.1</aws-secretsmanager-caching.version>

ql/src/java/org/apache/hadoop/hive/ql/ddl/ShowUtils.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import org.apache.commons.lang3.StringUtils;
2424
import org.apache.datasketches.kll.KllFloatsSketch;
2525
import org.apache.datasketches.memory.Memory;
26+
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
2627
import org.apache.hadoop.fs.FSDataOutputStream;
2728
import org.apache.hadoop.fs.FileSystem;
2829
import org.apache.hadoop.fs.Path;
@@ -257,7 +258,7 @@ private static String convertHistogram(byte[] buffer, ColumnStatisticsData._Fiel
257258
final KllFloatsSketch kll = KllFloatsSketch.heapify(Memory.wrap(buffer));
258259
// to keep the visualization compact, we print only the quartiles (Q1, Q2 and Q3),
259260
// as min and max are displayed as separate statistics already
260-
final float[] quantiles = kll.getQuantiles(new double[]{ 0.25, 0.5, 0.75 });
261+
final float[] quantiles = kll.getQuantiles(new double[]{ 0.25, 0.5, 0.75 }, QuantileSearchCriteria.EXCLUSIVE);
261262

262263
Function<Float, Object> converter;
263264

ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.calcite.util.ImmutableBitSet;
4343
import org.apache.datasketches.kll.KllFloatsSketch;
4444
import org.apache.datasketches.memory.Memory;
45+
import org.apache.datasketches.quantilescommon.QuantileSearchCriteria;
4546
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
4647
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveConfPlannerContext;
4748
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -489,7 +490,7 @@ public Double visitLiteral(RexLiteral literal) {
489490

490491
private static double rangedSelectivity(KllFloatsSketch kll, float val1, float val2) {
491492
float[] splitPoints = new float[] { val1, val2 };
492-
double[] boundaries = kll.getCDF(splitPoints);
493+
double[] boundaries = kll.getCDF(splitPoints, QuantileSearchCriteria.EXCLUSIVE);
493494
return boundaries[1] - boundaries[0];
494495
}
495496

@@ -500,7 +501,7 @@ private static double rangedSelectivity(KllFloatsSketch kll, float val1, float v
500501
* @return the selectivity of a predicate "column &gt; value" in the range [0, 1]
501502
*/
502503
public static double greaterThanSelectivity(KllFloatsSketch kll, float value) {
503-
float max = kll.getMaxValue();
504+
float max = kll.getMaxItem();
504505
if (value > max) {
505506
return 0;
506507
}
@@ -518,10 +519,10 @@ public static double greaterThanSelectivity(KllFloatsSketch kll, float value) {
518519
* @return the selectivity of a predicate "column &gt;= value" in the range [0, 1]
519520
*/
520521
public static double greaterThanOrEqualSelectivity(KllFloatsSketch kll, float value) {
521-
if (value > kll.getMaxValue()) {
522+
if (value > kll.getMaxItem()) {
522523
return 0;
523524
}
524-
return rangedSelectivity(kll, value, Math.nextUp(kll.getMaxValue()));
525+
return rangedSelectivity(kll, value, Math.nextUp(kll.getMaxItem()));
525526
}
526527

527528
/**
@@ -531,10 +532,10 @@ public static double greaterThanOrEqualSelectivity(KllFloatsSketch kll, float va
531532
* @return the selectivity of a predicate "column &lt;= value" in the range [0, 1]
532533
*/
533534
public static double lessThanOrEqualSelectivity(KllFloatsSketch kll, float value) {
534-
if (value < kll.getMinValue()) {
535+
if (value < kll.getMinItem()) {
535536
return 0;
536537
}
537-
return kll.getCDF(new float[] { Math.nextUp(value) })[0];
538+
return kll.getCDF(new float[] { Math.nextUp(value) }, QuantileSearchCriteria.EXCLUSIVE)[0];
538539
}
539540

540541
/**
@@ -544,14 +545,14 @@ public static double lessThanOrEqualSelectivity(KllFloatsSketch kll, float value
544545
* @return the selectivity of a predicate "column &lt; value" in the range [0, 1]
545546
*/
546547
public static double lessThanSelectivity(KllFloatsSketch kll, float value) {
547-
float min = kll.getMinValue();
548+
float min = kll.getMinItem();
548549
if (value < min) {
549550
return 0;
550551
}
551552
if (Double.compare(value, min) == 0 || Double.compare(Math.nextUp(value), min) == 0) {
552553
return 0;
553554
}
554-
return kll.getCDF(new float[] { value })[0];
555+
return kll.getCDF(new float[] { value }, QuantileSearchCriteria.EXCLUSIVE)[0];
555556
}
556557

557558
/**

ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
import com.google.common.annotations.VisibleForTesting;
3838
import com.google.common.base.Preconditions;
3939
import org.apache.calcite.rel.metadata.RelMdUtil;
40-
import org.apache.datasketches.SketchesArgumentException;
40+
import org.apache.datasketches.common.SketchesArgumentException;
4141
import org.apache.datasketches.kll.KllFloatsSketch;
4242
import org.apache.datasketches.memory.Memory;
4343
import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;

0 commit comments

Comments
 (0)