Skip to content

Commit e8518df

Browse files
anton-kutuzovAnton Kutuzov
authored andcommitted
Add supporting of timestamp type in statistics for hive 3 and 4 versions
1 parent 4b67d9f commit e8518df

File tree

8 files changed

+258
-47
lines changed

8 files changed

+258
-47
lines changed

plugin/trino-hive/src/main/java/io/trino/plugin/hive/metastore/thrift/ThriftMetastoreUtil.java

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
import io.trino.hive.thrift.metastore.SerDeInfo;
4444
import io.trino.hive.thrift.metastore.StorageDescriptor;
4545
import io.trino.hive.thrift.metastore.StringColumnStatsData;
46+
import io.trino.hive.thrift.metastore.Timestamp;
47+
import io.trino.hive.thrift.metastore.TimestampColumnStatsData;
4648
import io.trino.metastore.AcidOperation;
4749
import io.trino.metastore.Column;
4850
import io.trino.metastore.Database;
@@ -83,6 +85,8 @@
8385
import java.math.BigInteger;
8486
import java.nio.ByteBuffer;
8587
import java.time.LocalDate;
88+
import java.time.LocalDateTime;
89+
import java.time.ZoneOffset;
8690
import java.util.ArrayDeque;
8791
import java.util.Arrays;
8892
import java.util.Collection;
@@ -148,8 +152,13 @@
148152
import static io.trino.spi.type.IntegerType.INTEGER;
149153
import static io.trino.spi.type.RealType.REAL;
150154
import static io.trino.spi.type.SmallintType.SMALLINT;
155+
import static io.trino.spi.type.StandardTypes.TIMESTAMP;
156+
import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_MILLISECOND;
157+
import static io.trino.spi.type.Timestamps.MICROSECONDS_PER_SECOND;
151158
import static io.trino.spi.type.TinyintType.TINYINT;
152159
import static io.trino.spi.type.VarbinaryType.VARBINARY;
160+
import static java.lang.Math.ceilDiv;
161+
import static java.lang.Math.floorDiv;
153162
import static java.lang.Math.toIntExact;
154163
import static java.lang.String.format;
155164
import static java.util.Locale.ENGLISH;
@@ -521,6 +530,10 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis
521530
LongColumnStatsData longStatsData = columnStatistics.getStatsData().getLongStats();
522531
OptionalLong min = longStatsData.isSetLowValue() ? OptionalLong.of(longStatsData.getLowValue()) : OptionalLong.empty();
523532
OptionalLong max = longStatsData.isSetHighValue() ? OptionalLong.of(longStatsData.getHighValue()) : OptionalLong.empty();
533+
if (min.isPresent() && max.isPresent() && columnStatistics.getColType().equals(TIMESTAMP)) {
534+
min = OptionalLong.of(min.getAsLong() * MICROSECONDS_PER_SECOND);
535+
max = OptionalLong.of(max.getAsLong() * MICROSECONDS_PER_SECOND);
536+
}
524537
OptionalLong nullsCount = longStatsData.isSetNumNulls() ? fromMetastoreNullsCount(longStatsData.getNumNulls()) : OptionalLong.empty();
525538
OptionalLong distinctValuesWithNullCount = longStatsData.isSetNumDVs() ? OptionalLong.of(longStatsData.getNumDVs()) : OptionalLong.empty();
526539
return createIntegerColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount);
@@ -585,6 +598,14 @@ public static HiveColumnStatistics fromMetastoreApiColumnStatistics(ColumnStatis
585598
averageColumnLength,
586599
nullsCount);
587600
}
601+
if (columnStatistics.getStatsData().isSetTimestampStats()) {
602+
TimestampColumnStatsData timestampStatsData = columnStatistics.getStatsData().getTimestampStats();
603+
OptionalLong min = timestampStatsData.isSetLowValue() ? fromMetastoreTimestamp(timestampStatsData.getLowValue()) : OptionalLong.empty();
604+
OptionalLong max = timestampStatsData.isSetHighValue() ? fromMetastoreTimestamp(timestampStatsData.getHighValue()) : OptionalLong.empty();
605+
OptionalLong nullsCount = timestampStatsData.isSetNumNulls() ? fromMetastoreNullsCount(timestampStatsData.getNumNulls()) : OptionalLong.empty();
606+
OptionalLong distinctValuesWithNullCount = timestampStatsData.isSetNumDVs() ? OptionalLong.of(timestampStatsData.getNumDVs()) : OptionalLong.empty();
607+
return createIntegerColumnStatistics(min, max, nullsCount, distinctValuesWithNullCount);
608+
}
588609
throw new TrinoException(HIVE_INVALID_METADATA, "Invalid column statistics data: " + columnStatistics);
589610
}
590611

@@ -609,6 +630,14 @@ public static OptionalLong fromMetastoreNullsCount(long nullsCount)
609630
return OptionalLong.of(nullsCount);
610631
}
611632

633+
private static OptionalLong fromMetastoreTimestamp(Timestamp timestamp)
634+
{
635+
if (timestamp == null) {
636+
return OptionalLong.empty();
637+
}
638+
return OptionalLong.of(LocalDateTime.ofEpochSecond(timestamp.getSecondsSinceEpoch(), 0, ZoneOffset.UTC).toInstant(ZoneOffset.UTC).toEpochMilli() * MICROSECONDS_PER_MILLISECOND);
639+
}
640+
612641
private static Optional<BigDecimal> fromMetastoreDecimal(@Nullable Decimal decimal)
613642
{
614643
if (decimal == null) {
@@ -768,8 +797,9 @@ public static ColumnStatisticsObj createMetastoreColumnStatistics(String columnN
768797
case SHORT:
769798
case INT:
770799
case LONG:
771-
case TIMESTAMP:
772800
return createLongStatistics(columnName, columnType, statistics);
801+
case TIMESTAMP:
802+
return createTimestampStatistics(columnName, columnType, statistics);
773803
case FLOAT:
774804
case DOUBLE:
775805
return createDoubleStatistics(columnName, columnType, statistics);
@@ -819,6 +849,18 @@ private static ColumnStatisticsObj createLongStatistics(String columnName, HiveT
819849
return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data));
820850
}
821851

852+
private static ColumnStatisticsObj createTimestampStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics)
853+
{
854+
LongColumnStatsData data = new LongColumnStatsData();
855+
statistics.getIntegerStatistics().ifPresent(timestampStatistics -> {
856+
timestampStatistics.getMin().ifPresent(value -> data.setLowValue(floorDiv(value, MICROSECONDS_PER_SECOND)));
857+
timestampStatistics.getMax().ifPresent(value -> data.setHighValue(ceilDiv(value, MICROSECONDS_PER_SECOND)));
858+
});
859+
statistics.getNullsCount().ifPresent(data::setNumNulls);
860+
statistics.getDistinctValuesWithNullCount().ifPresent(data::setNumDVs);
861+
return new ColumnStatisticsObj(columnName, columnType.toString(), longStats(data));
862+
}
863+
822864
private static ColumnStatisticsObj createDoubleStatistics(String columnName, HiveType columnType, HiveColumnStatistics statistics)
823865
{
824866
DoubleColumnStatsData data = new DoubleColumnStatsData();
@@ -893,8 +935,7 @@ public static Set<HiveColumnStatisticType> getSupportedColumnStatistics(Type typ
893935
return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES);
894936
}
895937
if (type instanceof TimestampType || type instanceof TimestampWithTimeZoneType) {
896-
// TODO (https://github.com/trinodb/trino/issues/5859) Add support for timestamp MIN_VALUE, MAX_VALUE
897-
return ImmutableSet.of(NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES);
938+
return ImmutableSet.of(MIN_VALUE, MAX_VALUE, NUMBER_OF_DISTINCT_VALUES, NUMBER_OF_NON_NULL_VALUES);
898939
}
899940
if (type instanceof VarcharType || type instanceof CharType) {
900941
// TODO Collect MIN,MAX once it is used by the optimizer

plugin/trino-hive/src/main/java/io/trino/plugin/hive/statistics/AbstractHiveStatisticsProvider.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
import io.trino.spi.statistics.TableStatistics;
4545
import io.trino.spi.type.CharType;
4646
import io.trino.spi.type.DecimalType;
47+
import io.trino.spi.type.TimestampType;
4748
import io.trino.spi.type.Type;
4849
import io.trino.spi.type.VarcharType;
4950

@@ -843,6 +844,9 @@ private static Optional<DoubleRange> createRange(Type type, HiveColumnStatistics
843844
if (type.equals(DATE)) {
844845
return statistics.getDateStatistics().flatMap(AbstractHiveStatisticsProvider::createDateRange);
845846
}
847+
if (type instanceof TimestampType) {
848+
return statistics.getIntegerStatistics().flatMap(AbstractHiveStatisticsProvider::createTimestampRange);
849+
}
846850
if (type instanceof DecimalType) {
847851
return statistics.getDecimalStatistics().flatMap(AbstractHiveStatisticsProvider::createDecimalRange);
848852
}
@@ -895,6 +899,14 @@ private static Optional<DoubleRange> createDateRange(DateStatistics statistics)
895899
return Optional.empty();
896900
}
897901

902+
private static Optional<DoubleRange> createTimestampRange(IntegerStatistics statistics)
903+
{
904+
if (statistics.getMin().isPresent() && statistics.getMax().isPresent()) {
905+
return Optional.of(new DoubleRange(statistics.getMin().getAsLong(), statistics.getMax().getAsLong()));
906+
}
907+
return Optional.empty();
908+
}
909+
898910
private static Optional<DoubleRange> createDecimalRange(DecimalStatistics statistics)
899911
{
900912
if (statistics.getMin().isPresent() && statistics.getMax().isPresent()) {

plugin/trino-hive/src/main/java/io/trino/plugin/hive/util/Statistics.java

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,13 @@
2626
import io.trino.spi.Page;
2727
import io.trino.spi.TrinoException;
2828
import io.trino.spi.block.Block;
29+
import io.trino.spi.block.Fixed12Block;
2930
import io.trino.spi.statistics.ColumnStatisticMetadata;
3031
import io.trino.spi.statistics.ComputedStatistics;
3132
import io.trino.spi.type.DecimalType;
3233
import io.trino.spi.type.Decimals;
34+
import io.trino.spi.type.LongTimestamp;
35+
import io.trino.spi.type.TimestampType;
3336
import io.trino.spi.type.Type;
3437

3538
import java.math.BigDecimal;
@@ -65,6 +68,7 @@
6568
import static io.trino.spi.type.IntegerType.INTEGER;
6669
import static io.trino.spi.type.RealType.REAL;
6770
import static io.trino.spi.type.SmallintType.SMALLINT;
71+
import static io.trino.spi.type.TimestampType.MAX_PRECISION;
6872
import static io.trino.spi.type.TinyintType.TINYINT;
6973
import static java.lang.Float.intBitsToFloat;
7074
import static java.lang.Math.toIntExact;
@@ -128,10 +132,12 @@ else if (type.equals(DOUBLE) || type.equals(REAL)) {
128132
else if (type.equals(DATE)) {
129133
result.setDateStatistics(new DateStatistics(Optional.empty(), Optional.empty()));
130134
}
135+
else if (type instanceof TimestampType) {
136+
result.setIntegerStatistics(new IntegerStatistics(OptionalLong.empty(), OptionalLong.empty()));
137+
}
131138
else if (type instanceof DecimalType) {
132139
result.setDecimalStatistics(new DecimalStatistics(Optional.empty(), Optional.empty()));
133140
}
134-
// TODO (https://github.com/trinodb/trino/issues/5859) Add support for timestamp
135141
else {
136142
throw new IllegalArgumentException("Unexpected type: " + type);
137143
}
@@ -240,10 +246,12 @@ else if (type.equals(DOUBLE) || type.equals(REAL)) {
240246
else if (type.equals(DATE)) {
241247
result.setDateStatistics(new DateStatistics(getDateValue(type, min), getDateValue(type, max)));
242248
}
249+
else if (type instanceof TimestampType) {
250+
result.setIntegerStatistics(new IntegerStatistics(getTimestampValue(type, min), getTimestampValue(type, max)));
251+
}
243252
else if (type instanceof DecimalType) {
244253
result.setDecimalStatistics(new DecimalStatistics(getDecimalValue(type, min), getDecimalValue(type, max)));
245254
}
246-
// TODO (https://github.com/trinodb/trino/issues/5859) Add support for timestamp
247255
else {
248256
throw new IllegalArgumentException("Unexpected type: " + type);
249257
}
@@ -288,6 +296,19 @@ private static Optional<LocalDate> getDateValue(Type type, Block block)
288296
return Optional.of(LocalDate.ofEpochDay(days));
289297
}
290298

299+
private static OptionalLong getTimestampValue(Type type, Block block)
300+
{
301+
verify(type instanceof TimestampType, "Unsupported type: %s", type);
302+
if (block.isNull(0)) {
303+
return OptionalLong.empty();
304+
}
305+
if (block instanceof Fixed12Block) {
306+
LongTimestamp ts = (LongTimestamp) TimestampType.createTimestampType(MAX_PRECISION).getObject(block, 0);
307+
return OptionalLong.of(ts.getEpochMicros());
308+
}
309+
return OptionalLong.of(type.getLong(block, 0));
310+
}
311+
291312
private static Optional<BigDecimal> getDecimalValue(Type type, Block block)
292313
{
293314
verify(type instanceof DecimalType, "Unsupported type: %s", type);

0 commit comments

Comments
 (0)