|
5 | 5 |
|
6 | 6 | package org.opensearch.sql.expression.function.udf.binning; |
7 | 7 |
|
| 8 | +import java.time.Instant; |
| 9 | +import java.time.ZoneOffset; |
| 10 | +import java.time.ZonedDateTime; |
| 11 | +import java.time.format.DateTimeFormatter; |
8 | 12 | import java.util.List; |
9 | 13 | import org.apache.calcite.adapter.enumerable.NotNullImplementor; |
10 | 14 | import org.apache.calcite.adapter.enumerable.NullPolicy; |
@@ -76,35 +80,76 @@ public Expression implement( |
76 | 80 | RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) { |
77 | 81 | Expression fieldValue = translatedOperands.get(0); |
78 | 82 | Expression numBins = translatedOperands.get(1); |
79 | | - Expression dataRange = translatedOperands.get(2); |
| 83 | + Expression minValue = translatedOperands.get(2); |
80 | 84 | Expression maxValue = translatedOperands.get(3); |
81 | 85 |
|
| 86 | + // Pass the field type information to help detect timestamps |
| 87 | + RelDataType fieldType = call.getOperands().get(0).getType(); |
| 88 | + boolean isTimestampField = dateRelatedType(fieldType); |
| 89 | + Expression isTimestamp = Expressions.constant(isTimestampField); |
| 90 | + |
| 91 | + // For timestamp fields, keep as-is (don't convert to Number) |
| 92 | + // For numeric fields, convert to Number |
| 93 | + Expression fieldValueExpr = |
| 94 | + isTimestampField ? fieldValue : Expressions.convert_(fieldValue, Number.class); |
| 95 | + Expression minValueExpr = |
| 96 | + isTimestampField ? minValue : Expressions.convert_(minValue, Number.class); |
| 97 | + Expression maxValueExpr = |
| 98 | + isTimestampField ? maxValue : Expressions.convert_(maxValue, Number.class); |
| 99 | + |
82 | 100 | return Expressions.call( |
83 | 101 | WidthBucketImplementor.class, |
84 | 102 | "calculateWidthBucket", |
85 | | - Expressions.convert_(fieldValue, Number.class), |
| 103 | + fieldValueExpr, |
86 | 104 | Expressions.convert_(numBins, Number.class), |
87 | | - Expressions.convert_(dataRange, Number.class), |
88 | | - Expressions.convert_(maxValue, Number.class)); |
| 105 | + minValueExpr, |
| 106 | + maxValueExpr, |
| 107 | + isTimestamp); |
89 | 108 | } |
90 | 109 |
|
91 | 110 | /** Width bucket calculation using nice number algorithm. */ |
92 | 111 | public static String calculateWidthBucket( |
93 | | - Number fieldValue, Number numBinsParam, Number dataRange, Number maxValue) { |
94 | | - if (fieldValue == null || numBinsParam == null || dataRange == null || maxValue == null) { |
| 112 | + Object fieldValue, |
| 113 | + Number numBinsParam, |
| 114 | + Object minValue, |
| 115 | + Object maxValue, |
| 116 | + boolean isTimestamp) { |
| 117 | + if (fieldValue == null || numBinsParam == null || minValue == null || maxValue == null) { |
95 | 118 | return null; |
96 | 119 | } |
97 | 120 |
|
98 | | - double value = fieldValue.doubleValue(); |
99 | 121 | int numBins = numBinsParam.intValue(); |
100 | | - |
101 | 122 | if (numBins < BinConstants.MIN_BINS || numBins > BinConstants.MAX_BINS) { |
102 | 123 | return null; |
103 | 124 | } |
104 | 125 |
|
105 | | - double range = dataRange.doubleValue(); |
106 | | - double max = maxValue.doubleValue(); |
| 126 | + // Handle timestamp fields differently |
| 127 | + if (isTimestamp) { |
| 128 | + // Convert all timestamp values to milliseconds |
| 129 | + long fieldMillis = convertTimestampToMillis(fieldValue); |
| 130 | + long minMillis = convertTimestampToMillis(minValue); |
| 131 | + long maxMillis = convertTimestampToMillis(maxValue); |
| 132 | + |
| 133 | + // Calculate range |
| 134 | + long rangeMillis = maxMillis - minMillis; |
| 135 | + if (rangeMillis <= 0) { |
| 136 | + return null; |
| 137 | + } |
| 138 | + |
| 139 | + return calculateTimestampBucket(fieldMillis, numBins, rangeMillis, minMillis); |
| 140 | + } |
| 141 | + |
| 142 | + // Numeric field handling (existing logic) |
| 143 | + Number numericValue = (Number) fieldValue; |
| 144 | + Number numericMin = (Number) minValue; |
| 145 | + Number numericMax = (Number) maxValue; |
| 146 | + |
| 147 | + double value = numericValue.doubleValue(); |
| 148 | + double min = numericMin.doubleValue(); |
| 149 | + double max = numericMax.doubleValue(); |
107 | 150 |
|
| 151 | + // Calculate range |
| 152 | + double range = max - min; |
108 | 153 | if (range <= 0) { |
109 | 154 | return null; |
110 | 155 | } |
@@ -152,6 +197,93 @@ private static double calculateOptimalWidth( |
152 | 197 | return optimalWidth; |
153 | 198 | } |
154 | 199 |
|
| 200 | + /** |
| 201 | + * Convert timestamp value to milliseconds. Handles both numeric (Long) milliseconds and String |
| 202 | + * formatted timestamps. |
| 203 | + */ |
| 204 | + private static long convertTimestampToMillis(Object timestamp) { |
| 205 | + if (timestamp instanceof Number) { |
| 206 | + return ((Number) timestamp).longValue(); |
| 207 | + } else if (timestamp instanceof String) { |
| 208 | + // Parse timestamp string "yyyy-MM-dd HH:mm:ss" to milliseconds |
| 209 | + // Use LocalDateTime to parse without timezone, then convert to UTC |
| 210 | + String timestampStr = (String) timestamp; |
| 211 | + java.time.LocalDateTime localDateTime = |
| 212 | + java.time.LocalDateTime.parse( |
| 213 | + timestampStr, DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
| 214 | + // Assume the timestamp is in UTC and convert to epoch millis |
| 215 | + return localDateTime.atZone(ZoneOffset.UTC).toInstant().toEpochMilli(); |
| 216 | + } else { |
| 217 | + throw new IllegalArgumentException("Unsupported timestamp type: " + timestamp.getClass()); |
| 218 | + } |
| 219 | + } |
| 220 | + |
| 221 | + /** |
| 222 | + * Calculate timestamp bucket using auto_date_histogram interval selection. Timestamps are in |
| 223 | + * milliseconds since epoch. Bins are aligned to the minimum timestamp, not to calendar |
| 224 | + * boundaries. |
| 225 | + */ |
| 226 | + private static String calculateTimestampBucket( |
| 227 | + long timestampMillis, int numBins, long rangeMillis, long minMillis) { |
| 228 | + // Calculate target width in milliseconds |
| 229 | + long targetWidthMillis = rangeMillis / numBins; |
| 230 | + |
| 231 | + // Select appropriate time interval (same as OpenSearch auto_date_histogram) |
| 232 | + long intervalMillis = selectTimeInterval(targetWidthMillis); |
| 233 | + |
| 234 | + // Floor timestamp to the interval boundary aligned with minMillis |
| 235 | + // This ensures bins start at the data's minimum value, like OpenSearch auto_date_histogram |
| 236 | + long offsetFromMin = timestampMillis - minMillis; |
| 237 | + long intervalsSinceMin = offsetFromMin / intervalMillis; |
| 238 | + long binStartMillis = minMillis + (intervalsSinceMin * intervalMillis); |
| 239 | + |
| 240 | + // Format as ISO 8601 timestamp string |
| 241 | + return formatTimestamp(binStartMillis); |
| 242 | + } |
| 243 | + |
| 244 | + /** |
| 245 | + * Select the appropriate time interval based on target width. Uses the same intervals as |
| 246 | + * OpenSearch auto_date_histogram: 1s, 5s, 10s, 30s, 1m, 5m, 10m, 30m, 1h, 3h, 12h, 1d, 7d, 1M, |
| 247 | + * 1y |
| 248 | + */ |
| 249 | + private static long selectTimeInterval(long targetWidthMillis) { |
| 250 | + // Define nice time intervals in milliseconds |
| 251 | + long[] intervals = { |
| 252 | + 1000L, // 1 second |
| 253 | + 5000L, // 5 seconds |
| 254 | + 10000L, // 10 seconds |
| 255 | + 30000L, // 30 seconds |
| 256 | + 60000L, // 1 minute |
| 257 | + 300000L, // 5 minutes |
| 258 | + 600000L, // 10 minutes |
| 259 | + 1800000L, // 30 minutes |
| 260 | + 3600000L, // 1 hour |
| 261 | + 10800000L, // 3 hours |
| 262 | + 43200000L, // 12 hours |
| 263 | + 86400000L, // 1 day |
| 264 | + 604800000L, // 7 days |
| 265 | + 2592000000L, // 30 days (approximate month) |
| 266 | + 31536000000L // 365 days (approximate year) |
| 267 | + }; |
| 268 | + |
| 269 | + // Find the smallest interval that is >= target width |
| 270 | + for (long interval : intervals) { |
| 271 | + if (interval >= targetWidthMillis) { |
| 272 | + return interval; |
| 273 | + } |
| 274 | + } |
| 275 | + |
| 276 | + // If target is larger than all intervals, use the largest |
| 277 | + return intervals[intervals.length - 1]; |
| 278 | + } |
| 279 | + |
| 280 | + /** Format timestamp in milliseconds as ISO 8601 string. Format: "yyyy-MM-dd HH:mm:ss" */ |
| 281 | + private static String formatTimestamp(long timestampMillis) { |
| 282 | + Instant instant = Instant.ofEpochMilli(timestampMillis); |
| 283 | + ZonedDateTime zdt = instant.atZone(ZoneOffset.UTC); |
| 284 | + return zdt.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")); |
| 285 | + } |
| 286 | + |
155 | 287 | /** Format range string with appropriate precision. */ |
156 | 288 | private static String formatRange(double binStart, double binEnd, double span) { |
157 | 289 | if (isIntegerSpan(span) && isIntegerValue(binStart) && isIntegerValue(binEnd)) { |
|
0 commit comments