Skip to content

Commit 6fdcd9f

Browse files
committed
fix bins
Signed-off-by: Kai Huang <[email protected]>
1 parent 279eb67 commit 6fdcd9f

File tree

5 files changed

+525
-16
lines changed

5 files changed

+525
-16
lines changed

core/src/main/java/org/opensearch/sql/calcite/utils/binning/handlers/CountBinHandler.java

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
package org.opensearch.sql.calcite.utils.binning.handlers;
77

88
import org.apache.calcite.rex.RexNode;
9-
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
109
import org.opensearch.sql.ast.expression.Literal;
1110
import org.opensearch.sql.ast.tree.Bin;
1211
import org.opensearch.sql.ast.tree.CountBin;
@@ -30,20 +29,21 @@ public RexNode createExpression(
3029
requestedBins = BinConstants.DEFAULT_BINS;
3130
}
3231

33-
// Calculate data range using window functions
34-
RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex();
32+
// Calculate MIN and MAX using window functions
3533
RexNode maxValue = context.relBuilder.max(fieldExpr).over().toRex();
36-
RexNode dataRange = context.relBuilder.call(SqlStdOperatorTable.MINUS, maxValue, minValue);
34+
RexNode minValue = context.relBuilder.min(fieldExpr).over().toRex();
3735

3836
// Convert start/end parameters
3937
RexNode startValue = convertParameter(countBin.getStart(), context);
4038
RexNode endValue = convertParameter(countBin.getEnd(), context);
4139

42-
// WIDTH_BUCKET(field_value, num_bins, data_range, max_value)
40+
// WIDTH_BUCKET(field_value, num_bins, min_value, max_value)
41+
// Note: We pass minValue instead of dataRange - WIDTH_BUCKET will calculate the range
42+
// internally
4343
RexNode numBins = context.relBuilder.literal(requestedBins);
4444

4545
return context.rexBuilder.makeCall(
46-
PPLBuiltinOperators.WIDTH_BUCKET, fieldExpr, numBins, dataRange, maxValue);
46+
PPLBuiltinOperators.WIDTH_BUCKET, fieldExpr, numBins, minValue, maxValue);
4747
}
4848

4949
private RexNode convertParameter(

core/src/main/java/org/opensearch/sql/expression/function/udf/binning/WidthBucketFunction.java

Lines changed: 142 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,10 @@
55

66
package org.opensearch.sql.expression.function.udf.binning;
77

8+
import java.time.Instant;
9+
import java.time.ZoneOffset;
10+
import java.time.ZonedDateTime;
11+
import java.time.format.DateTimeFormatter;
812
import java.util.List;
913
import org.apache.calcite.adapter.enumerable.NotNullImplementor;
1014
import org.apache.calcite.adapter.enumerable.NullPolicy;
@@ -76,35 +80,76 @@ public Expression implement(
7680
RexToLixTranslator translator, RexCall call, List<Expression> translatedOperands) {
7781
Expression fieldValue = translatedOperands.get(0);
7882
Expression numBins = translatedOperands.get(1);
79-
Expression dataRange = translatedOperands.get(2);
83+
Expression minValue = translatedOperands.get(2);
8084
Expression maxValue = translatedOperands.get(3);
8185

86+
// Pass the field type information to help detect timestamps
87+
RelDataType fieldType = call.getOperands().get(0).getType();
88+
boolean isTimestampField = dateRelatedType(fieldType);
89+
Expression isTimestamp = Expressions.constant(isTimestampField);
90+
91+
// For timestamp fields, keep as-is (don't convert to Number)
92+
// For numeric fields, convert to Number
93+
Expression fieldValueExpr =
94+
isTimestampField ? fieldValue : Expressions.convert_(fieldValue, Number.class);
95+
Expression minValueExpr =
96+
isTimestampField ? minValue : Expressions.convert_(minValue, Number.class);
97+
Expression maxValueExpr =
98+
isTimestampField ? maxValue : Expressions.convert_(maxValue, Number.class);
99+
82100
return Expressions.call(
83101
WidthBucketImplementor.class,
84102
"calculateWidthBucket",
85-
Expressions.convert_(fieldValue, Number.class),
103+
fieldValueExpr,
86104
Expressions.convert_(numBins, Number.class),
87-
Expressions.convert_(dataRange, Number.class),
88-
Expressions.convert_(maxValue, Number.class));
105+
minValueExpr,
106+
maxValueExpr,
107+
isTimestamp);
89108
}
90109

91110
/** Width bucket calculation using nice number algorithm. */
92111
public static String calculateWidthBucket(
93-
Number fieldValue, Number numBinsParam, Number dataRange, Number maxValue) {
94-
if (fieldValue == null || numBinsParam == null || dataRange == null || maxValue == null) {
112+
Object fieldValue,
113+
Number numBinsParam,
114+
Object minValue,
115+
Object maxValue,
116+
boolean isTimestamp) {
117+
if (fieldValue == null || numBinsParam == null || minValue == null || maxValue == null) {
95118
return null;
96119
}
97120

98-
double value = fieldValue.doubleValue();
99121
int numBins = numBinsParam.intValue();
100-
101122
if (numBins < BinConstants.MIN_BINS || numBins > BinConstants.MAX_BINS) {
102123
return null;
103124
}
104125

105-
double range = dataRange.doubleValue();
106-
double max = maxValue.doubleValue();
126+
// Handle timestamp fields differently
127+
if (isTimestamp) {
128+
// Convert all timestamp values to milliseconds
129+
long fieldMillis = convertTimestampToMillis(fieldValue);
130+
long minMillis = convertTimestampToMillis(minValue);
131+
long maxMillis = convertTimestampToMillis(maxValue);
132+
133+
// Calculate range
134+
long rangeMillis = maxMillis - minMillis;
135+
if (rangeMillis <= 0) {
136+
return null;
137+
}
138+
139+
return calculateTimestampBucket(fieldMillis, numBins, rangeMillis, minMillis);
140+
}
141+
142+
// Numeric field handling (existing logic)
143+
Number numericValue = (Number) fieldValue;
144+
Number numericMin = (Number) minValue;
145+
Number numericMax = (Number) maxValue;
146+
147+
double value = numericValue.doubleValue();
148+
double min = numericMin.doubleValue();
149+
double max = numericMax.doubleValue();
107150

151+
// Calculate range
152+
double range = max - min;
108153
if (range <= 0) {
109154
return null;
110155
}
@@ -152,6 +197,93 @@ private static double calculateOptimalWidth(
152197
return optimalWidth;
153198
}
154199

200+
/**
201+
* Convert timestamp value to milliseconds. Handles both numeric (Long) milliseconds and String
202+
* formatted timestamps.
203+
*/
204+
private static long convertTimestampToMillis(Object timestamp) {
205+
if (timestamp instanceof Number) {
206+
return ((Number) timestamp).longValue();
207+
} else if (timestamp instanceof String) {
208+
// Parse timestamp string "yyyy-MM-dd HH:mm:ss" to milliseconds
209+
// Use LocalDateTime to parse without timezone, then convert to UTC
210+
String timestampStr = (String) timestamp;
211+
java.time.LocalDateTime localDateTime =
212+
java.time.LocalDateTime.parse(
213+
timestampStr, DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
214+
// Assume the timestamp is in UTC and convert to epoch millis
215+
return localDateTime.atZone(ZoneOffset.UTC).toInstant().toEpochMilli();
216+
} else {
217+
throw new IllegalArgumentException("Unsupported timestamp type: " + timestamp.getClass());
218+
}
219+
}
220+
221+
/**
222+
* Calculate timestamp bucket using auto_date_histogram interval selection. Timestamps are in
223+
* milliseconds since epoch. Bins are aligned to the minimum timestamp, not to calendar
224+
* boundaries.
225+
*/
226+
private static String calculateTimestampBucket(
227+
long timestampMillis, int numBins, long rangeMillis, long minMillis) {
228+
// Calculate target width in milliseconds
229+
long targetWidthMillis = rangeMillis / numBins;
230+
231+
// Select appropriate time interval (same as OpenSearch auto_date_histogram)
232+
long intervalMillis = selectTimeInterval(targetWidthMillis);
233+
234+
// Floor timestamp to the interval boundary aligned with minMillis
235+
// This ensures bins start at the data's minimum value, like OpenSearch auto_date_histogram
236+
long offsetFromMin = timestampMillis - minMillis;
237+
long intervalsSinceMin = offsetFromMin / intervalMillis;
238+
long binStartMillis = minMillis + (intervalsSinceMin * intervalMillis);
239+
240+
// Format as ISO 8601 timestamp string
241+
return formatTimestamp(binStartMillis);
242+
}
243+
244+
/**
245+
* Select the appropriate time interval based on target width. Uses the same intervals as
246+
* OpenSearch auto_date_histogram: 1s, 5s, 10s, 30s, 1m, 5m, 10m, 30m, 1h, 3h, 12h, 1d, 7d, 1M,
247+
* 1y
248+
*/
249+
private static long selectTimeInterval(long targetWidthMillis) {
250+
// Define nice time intervals in milliseconds
251+
long[] intervals = {
252+
1000L, // 1 second
253+
5000L, // 5 seconds
254+
10000L, // 10 seconds
255+
30000L, // 30 seconds
256+
60000L, // 1 minute
257+
300000L, // 5 minutes
258+
600000L, // 10 minutes
259+
1800000L, // 30 minutes
260+
3600000L, // 1 hour
261+
10800000L, // 3 hours
262+
43200000L, // 12 hours
263+
86400000L, // 1 day
264+
604800000L, // 7 days
265+
2592000000L, // 30 days (approximate month)
266+
31536000000L // 365 days (approximate year)
267+
};
268+
269+
// Find the smallest interval that is >= target width
270+
for (long interval : intervals) {
271+
if (interval >= targetWidthMillis) {
272+
return interval;
273+
}
274+
}
275+
276+
// If target is larger than all intervals, use the largest
277+
return intervals[intervals.length - 1];
278+
}
279+
280+
/** Format timestamp in milliseconds as ISO 8601 string. Format: "yyyy-MM-dd HH:mm:ss" */
281+
private static String formatTimestamp(long timestampMillis) {
282+
Instant instant = Instant.ofEpochMilli(timestampMillis);
283+
ZonedDateTime zdt = instant.atZone(ZoneOffset.UTC);
284+
return zdt.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));
285+
}
286+
155287
/** Format range string with appropriate precision. */
156288
private static String formatRange(double binStart, double binEnd, double span) {
157289
if (isIntegerSpan(span) && isIntegerValue(binStart) && isIntegerValue(binEnd)) {

0 commit comments

Comments
 (0)