@@ -100,15 +100,24 @@ clickbench_pushdown: ClickBench queries against partitioned (100 files) parqu
100
100
clickbench_extended: ClickBench \" inspired\" queries against a single parquet (DataFusion specific)
101
101
102
102
# H2O.ai Benchmarks (Group By, Join, Window)
103
- h2o_small: h2oai benchmark with small dataset (1e7 rows) for groupby, default file format is csv
104
- h2o_medium: h2oai benchmark with medium dataset (1e8 rows) for groupby, default file format is csv
105
- h2o_big: h2oai benchmark with large dataset (1e9 rows) for groupby, default file format is csv
106
- h2o_small_join: h2oai benchmark with small dataset (1e7 rows) for join, default file format is csv
107
- h2o_medium_join: h2oai benchmark with medium dataset (1e8 rows) for join, default file format is csv
108
- h2o_big_join: h2oai benchmark with large dataset (1e9 rows) for join, default file format is csv
109
- h2o_small_window: Extended h2oai benchmark with small dataset (1e7 rows) for window, default file format is csv
110
- h2o_medium_window: Extended h2oai benchmark with medium dataset (1e8 rows) for window, default file format is csv
111
- h2o_big_window: Extended h2oai benchmark with large dataset (1e9 rows) for window, default file format is csv
103
+ h2o_small: h2oai benchmark with small dataset (1e7 rows) for groupby, default file format is csv
104
+ h2o_medium: h2oai benchmark with medium dataset (1e8 rows) for groupby, default file format is csv
105
+ h2o_big: h2oai benchmark with large dataset (1e9 rows) for groupby, default file format is csv
106
+ h2o_small_join: h2oai benchmark with small dataset (1e7 rows) for join, default file format is csv
107
+ h2o_medium_join: h2oai benchmark with medium dataset (1e8 rows) for join, default file format is csv
108
+ h2o_big_join: h2oai benchmark with large dataset (1e9 rows) for join, default file format is csv
109
+ h2o_small_window: Extended h2oai benchmark with small dataset (1e7 rows) for window, default file format is csv
110
+ h2o_medium_window: Extended h2oai benchmark with medium dataset (1e8 rows) for window, default file format is csv
111
+ h2o_big_window: Extended h2oai benchmark with large dataset (1e9 rows) for window, default file format is csv
112
+ h2o_small_parquet: h2oai benchmark with small dataset (1e7 rows) for groupby, file format is parquet
113
+ h2o_medium_parquet: h2oai benchmark with medium dataset (1e8 rows) for groupby, file format is parquet
114
+ h2o_big_parquet: h2oai benchmark with large dataset (1e9 rows) for groupby, file format is parquet
115
+ h2o_small_join_parquet: h2oai benchmark with small dataset (1e7 rows) for join, file format is parquet
116
+ h2o_medium_join_parquet: h2oai benchmark with medium dataset (1e8 rows) for join, file format is parquet
117
+ h2o_big_join_parquet: h2oai benchmark with large dataset (1e9 rows) for join, file format is parquet
118
+ h2o_small_window_parquet: Extended h2oai benchmark with small dataset (1e7 rows) for window, file format is parquet
119
+ h2o_medium_window_parquet: Extended h2oai benchmark with medium dataset (1e8 rows) for window, file format is parquet
120
+ h2o_big_window_parquet: Extended h2oai benchmark with large dataset (1e9 rows) for window, file format is parquet
112
121
113
122
# Join Order Benchmark (IMDB)
114
123
imdb: Join Order Benchmark (JOB) using the IMDB dataset converted to parquet
@@ -245,6 +254,34 @@ main() {
245
254
h2o_big_window)
246
255
data_h2o_join " BIG" " CSV"
247
256
;;
257
+ h2o_small_parquet)
258
+ data_h2o " SMALL" " PARQUET"
259
+ ;;
260
+ h2o_medium_parquet)
261
+ data_h2o " MEDIUM" " PARQUET"
262
+ ;;
263
+ h2o_big_parquet)
264
+ data_h2o " BIG" " PARQUET"
265
+ ;;
266
+ h2o_small_join_parquet)
267
+ data_h2o_join " SMALL" " PARQUET"
268
+ ;;
269
+ h2o_medium_join_parquet)
270
+ data_h2o_join " MEDIUM" " PARQUET"
271
+ ;;
272
+ h2o_big_join_parquet)
273
+ data_h2o_join " BIG" " PARQUET"
274
+ ;;
275
+ # h2o window benchmark uses the same data as the h2o join
276
+ h2o_small_window_parquet)
277
+ data_h2o_join " SMALL" " PARQUET"
278
+ ;;
279
+ h2o_medium_window_parquet)
280
+ data_h2o_join " MEDIUM" " PARQUET"
281
+ ;;
282
+ h2o_big_window_parquet)
283
+ data_h2o_join " BIG" " PARQUET"
284
+ ;;
248
285
external_aggr)
249
286
# same data as for tpch
250
287
data_tpch " 1"
@@ -381,6 +418,34 @@ main() {
381
418
h2o_big_window)
382
419
run_h2o_window " BIG" " CSV" " window"
383
420
;;
421
+ h2o_small_parquet)
422
+ run_h2o " SMALL" " PARQUET"
423
+ ;;
424
+ h2o_medium_parquet)
425
+ run_h2o " MEDIUM" " PARQUET"
426
+ ;;
427
+ h2o_big_parquet)
428
+ run_h2o " BIG" " PARQUET"
429
+ ;;
430
+ h2o_small_join_parquet)
431
+ run_h2o_join " SMALL" " PARQUET"
432
+ ;;
433
+ h2o_medium_join_parquet)
434
+ run_h2o_join " MEDIUM" " PARQUET"
435
+ ;;
436
+ h2o_big_join_parquet)
437
+ run_h2o_join " BIG" " PARQUET"
438
+ ;;
439
+ # h2o window benchmark uses the same data as the h2o join
440
+ h2o_small_window_parquet)
441
+ run_h2o_window " SMALL" " PARQUET"
442
+ ;;
443
+ h2o_medium_window_parquet)
444
+ run_h2o_window " MEDIUM" " PARQUET"
445
+ ;;
446
+ h2o_big_window_parquet)
447
+ run_h2o_window " BIG" " PARQUET"
448
+ ;;
384
449
external_aggr)
385
450
run_external_aggr
386
451
;;
0 commit comments