Skip to content

Commit d1a7d06

Browse files
committed
address comments
1 parent b62e15b commit d1a7d06

File tree

4 files changed

+111
-103
lines changed

4 files changed

+111
-103
lines changed

parquet-variant-compute/src/type_conversion.rs

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
//! Module for transforming a typed arrow `Array` to `VariantArray`.
1919
2020
use arrow::datatypes::{self, ArrowPrimitiveType, ArrowTimestampType, Date32Type};
21+
use chrono::{DateTime, Utc};
2122
use parquet_variant::Variant;
2223

2324
/// Options for controlling the behavior of `cast_to_variant_with_options`.
@@ -41,10 +42,19 @@ pub(crate) trait PrimitiveFromVariant: ArrowPrimitiveType {
4142
/// Extension trait for Arrow timestamp types that can extract their native value from a Variant
4243
/// We can't use [`PrimitiveFromVariant`] directly because we might need to use methods that
4344
/// are only available on [`ArrowTimestampType`] (such as with_timezone_opt)
44-
pub(crate) trait TimestampFromVariant: ArrowTimestampType {
45+
pub(crate) trait TimestampFromVariant<const NTZ: bool>: ArrowTimestampType {
4546
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native>;
4647
}
4748

49+
/// Extension trait that [`ArrowTimestampType`] handle [`DateTime<Utc>`] like [`NaiveDateTime`]
50+
trait MakeValueTz: ArrowTimestampType {
51+
fn make_value_tz(timestamp: DateTime<Utc>) -> Option<i64> {
52+
Self::make_value(timestamp.naive_utc())
53+
}
54+
}
55+
56+
impl<T: ArrowTimestampType> MakeValueTz for T {}
57+
4858
/// Macro to generate PrimitiveFromVariant implementations for Arrow primitive types
4959
macro_rules! impl_primitive_from_variant {
5060
($arrow_type:ty, $variant_method:ident $(, $cast_fn:expr)?) => {
@@ -56,15 +66,13 @@ macro_rules! impl_primitive_from_variant {
5666
}
5767
}
5868
};
59-
($arrow_type:ty, $( $variant_type:pat => $variant_method:ident, $cast_fn:expr ),+ $(,)?) => {
60-
impl TimestampFromVariant for $arrow_type {
69+
}
70+
71+
macro_rules! impl_timestamp_from_variant {
72+
($timestamp_type:ty, $variant_method:ident, ntz=$ntz:ident, $cast_fn:expr $(,)?) => {
73+
impl TimestampFromVariant<{ $ntz }> for $timestamp_type {
6174
fn from_variant(variant: &Variant<'_, '_>) -> Option<Self::Native> {
62-
match variant {
63-
$(
64-
$variant_type => variant.$variant_method().map($cast_fn),
65-
)+
66-
_ => None
67-
}
75+
variant.$variant_method().and_then($cast_fn)
6876
}
6977
}
7078
};
@@ -86,13 +94,30 @@ impl_primitive_from_variant!(
8694
as_naive_date,
8795
Date32Type::from_naive_date
8896
);
89-
impl_primitive_from_variant!(
97+
impl_timestamp_from_variant!(
9098
datatypes::TimestampMicrosecondType,
91-
Variant::TimestampNtzMicros(_) | Variant::TimestampMicros(_) => as_timestamp_micros, |t| t);
92-
impl_primitive_from_variant!(
99+
as_timestamp_ntz_micros,
100+
ntz = true,
101+
Self::make_value,
102+
);
103+
impl_timestamp_from_variant!(
104+
datatypes::TimestampMicrosecondType,
105+
as_timestamp_micros,
106+
ntz = false,
107+
Self::make_value_tz
108+
);
109+
impl_timestamp_from_variant!(
93110
datatypes::TimestampNanosecondType,
94-
Variant::TimestampNtzMicros(_) | Variant::TimestampMicros(_) => as_timestamp_micros, |t| 1000 * t,
95-
Variant::TimestampNtzNanos(_) | Variant::TimestampNanos(_) => as_timestamp_nanos, |t| t);
111+
as_timestamp_ntz_nanos,
112+
ntz = true,
113+
Self::make_value
114+
);
115+
impl_timestamp_from_variant!(
116+
datatypes::TimestampNanosecondType,
117+
as_timestamp_nanos,
118+
ntz = false,
119+
Self::make_value_tz
120+
);
96121

97122
/// Convert the value at a specific index in the given array into a `Variant`.
98123
macro_rules! non_generic_conversion_single_value {

parquet-variant-compute/src/variant_get.rs

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -866,18 +866,6 @@ mod test {
866866
])
867867
);
868868

869-
// test converting micro to nano
870-
perfectly_shredded_to_arrow_primitive_test!(
871-
get_variant_perfectly_shredded_timestamp_micro_ntz_as_nano_ntz,
872-
DataType::Timestamp(TimeUnit::Nanosecond, None),
873-
perfectly_shredded_timestamp_micro_ntz_variant_array,
874-
arrow::array::TimestampNanosecondArray::from(vec![
875-
Some(-456000000),
876-
Some(1758602096000001000),
877-
Some(1758602096000002000)
878-
])
879-
);
880-
881869
perfectly_shredded_variant_array_fn!(perfectly_shredded_timestamp_micro_variant_array, || {
882870
arrow::array::TimestampMicrosecondArray::from(vec![
883871
Some(-456000),
@@ -899,19 +887,6 @@ mod test {
899887
.with_timezone("+00:00")
900888
);
901889

902-
// test converting micro to nano
903-
perfectly_shredded_to_arrow_primitive_test!(
904-
get_variant_perfectly_shredded_timestamp_micro_as_nano,
905-
DataType::Timestamp(TimeUnit::Nanosecond, Some(Arc::from("+00:00"))),
906-
perfectly_shredded_timestamp_micro_variant_array,
907-
arrow::array::TimestampNanosecondArray::from(vec![
908-
Some(-456000000),
909-
Some(1758602096000001000),
910-
Some(1758602096000002000)
911-
])
912-
.with_timezone("+00:00")
913-
);
914-
915890
perfectly_shredded_variant_array_fn!(
916891
perfectly_shredded_timestamp_nano_ntz_variant_array,
917892
|| {

parquet-variant-compute/src/variant_to_arrow.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,11 @@ pub(crate) enum PrimitiveVariantToArrowRowBuilder<'a> {
4646
Float32(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float32Type>),
4747
Float64(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Float64Type>),
4848
TimestampMicro(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>),
49+
TimestampMicroNtz(
50+
VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampMicrosecondType>,
51+
),
4952
TimestampNano(VariantToTimestampArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
53+
TimestampNanoNtz(VariantToTimestampNtzArrowRowBuilder<'a, datatypes::TimestampNanosecondType>),
5054
Date(VariantToPrimitiveArrowRowBuilder<'a, datatypes::Date32Type>),
5155
}
5256

@@ -79,7 +83,9 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
7983
Float32(b) => b.append_null(),
8084
Float64(b) => b.append_null(),
8185
TimestampMicro(b) => b.append_null(),
86+
TimestampMicroNtz(b) => b.append_null(),
8287
TimestampNano(b) => b.append_null(),
88+
TimestampNanoNtz(b) => b.append_null(),
8389
Date(b) => b.append_null(),
8490
}
8591
}
@@ -100,7 +106,9 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
100106
Float32(b) => b.append_value(value),
101107
Float64(b) => b.append_value(value),
102108
TimestampMicro(b) => b.append_value(value),
109+
TimestampMicroNtz(b) => b.append_value(value),
103110
TimestampNano(b) => b.append_value(value),
111+
TimestampNanoNtz(b) => b.append_value(value),
104112
Date(b) => b.append_value(value),
105113
}
106114
}
@@ -121,7 +129,9 @@ impl<'a> PrimitiveVariantToArrowRowBuilder<'a> {
121129
Float32(b) => b.finish(),
122130
Float64(b) => b.finish(),
123131
TimestampMicro(b) => b.finish(),
132+
TimestampMicroNtz(b) => b.finish(),
124133
TimestampNano(b) => b.finish(),
134+
TimestampNanoNtz(b) => b.finish(),
125135
Date(b) => b.finish(),
126136
}
127137
}
@@ -210,10 +220,15 @@ pub(crate) fn make_primitive_variant_to_arrow_row_builder<'a>(
210220
cast_options,
211221
capacity,
212222
)),
223+
DataType::Timestamp(TimeUnit::Microsecond, None) => TimestampMicroNtz(
224+
VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
225+
),
213226
DataType::Timestamp(TimeUnit::Microsecond, tz) => TimestampMicro(
214227
VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
215228
),
216-
229+
DataType::Timestamp(TimeUnit::Nanosecond, None) => TimestampNanoNtz(
230+
VariantToTimestampNtzArrowRowBuilder::new(cast_options, capacity),
231+
),
217232
DataType::Timestamp(TimeUnit::Nanosecond, tz) => TimestampNano(
218233
VariantToTimestampArrowRowBuilder::new(cast_options, capacity, tz.clone()),
219234
),
@@ -401,7 +416,14 @@ define_variant_to_primitive_builder!(
401416
);
402417

403418
define_variant_to_primitive_builder!(
404-
struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant>
419+
struct VariantToTimestampNtzArrowRowBuilder<'a, T:TimestampFromVariant<true>>
420+
|capacity| -> PrimitiveBuilder<T> { PrimitiveBuilder::<T>::with_capacity(capacity) },
421+
|value| T::from_variant(value),
422+
type_name: get_type_name::<T>()
423+
);
424+
425+
define_variant_to_primitive_builder!(
426+
struct VariantToTimestampArrowRowBuilder<'a, T:TimestampFromVariant<false>>
405427
|capacity, tz: Option<Arc<str>> | -> PrimitiveBuilder<T> {
406428
PrimitiveBuilder::<T>::with_capacity(capacity).with_timezone_opt(tz)
407429
},

parquet-variant/src/variant.rs

Lines changed: 48 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -533,8 +533,8 @@ impl<'m, 'v> Variant<'m, 'v> {
533533

534534
/// Converts this variant to a `DateTime<Utc>` if possible.
535535
///
536-
/// Returns `Some(DateTime<Utc>)` for timestamp variants,
537-
/// `None` for non-timestamp variants.
536+
/// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampMicros`] variants,
537+
/// `None` for other variants.
538538
///
539539
/// # Examples
540540
///
@@ -545,92 +545,81 @@ impl<'m, 'v> Variant<'m, 'v> {
545545
/// // you can extract a DateTime<Utc> from a UTC-adjusted variant
546546
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap().and_utc();
547547
/// let v1 = Variant::from(datetime);
548-
/// assert_eq!(v1.as_datetime_utc(), Some(datetime));
548+
/// assert_eq!(v1.as_timestamp_micros(), Some(datetime));
549+
///
550+
/// // but not for other variants.
549551
/// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap().and_utc();
550552
/// let v2 = Variant::from(datetime_nanos);
551-
/// assert_eq!(v2.as_datetime_utc(), Some(datetime_nanos));
552-
///
553-
/// // but not from other variants
554-
/// let v3 = Variant::from("hello!");
555-
/// assert_eq!(v3.as_datetime_utc(), None);
553+
/// assert_eq!(v2.as_timestamp_micros(), None);
556554
/// ```
557-
pub fn as_datetime_utc(&self) -> Option<DateTime<Utc>> {
555+
pub fn as_timestamp_micros(&self) -> Option<DateTime<Utc>> {
558556
match *self {
559-
Variant::TimestampMicros(d) | Variant::TimestampNanos(d) => Some(d),
557+
Variant::TimestampMicros(d) => Some(d),
560558
_ => None,
561559
}
562560
}
563561

564-
/// Converts this variant to a `i64` representing microseconds since the Unix epoch if possible.
565-
/// This is useful when convert the variant to arrow types.
562+
/// Converts this variant to a `NaiveDateTime` if possible.
566563
///
567-
/// Returns Some(i64) for [`Variant::TimestampMicros`] and [`Variant::TimestampNtzMicros`],
568-
/// None for the other variant types.
564+
/// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzMicros`] variants,
565+
/// `None` for other variants.
566+
///
567+
/// # Examples
569568
///
570569
/// ```
571570
/// use parquet_variant::Variant;
572571
/// use chrono::NaiveDate;
573572
///
574-
/// // you can extract an i64 from Variant::TimestampMicros
575-
/// let datetime = NaiveDate::from_ymd_opt(2025, 10, 03).unwrap().and_hms_milli_opt(12, 34, 56, 789).unwrap().and_utc();
573+
/// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
574+
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
576575
/// let v1 = Variant::from(datetime);
577-
/// assert_eq!(v1.as_timestamp_micros(), Some(1759494896789000));
576+
/// assert_eq!(v1.as_timestamp_ntz_micros(), Some(datetime));
578577
///
579-
/// // or Variant::TimestampNtzMicros
580-
/// let datetime_ntz = NaiveDate::from_ymd_opt(2025, 10, 03).unwrap().and_hms_milli_opt(12, 34, 56, 789).unwrap();
581-
/// let v2 = Variant::from(datetime_ntz);
582-
/// assert_eq!(v1.as_timestamp_micros(), Some(1759494896789000));
583-
///
584-
/// // but not from other variants
585-
/// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 10, 03).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap().and_utc();
586-
/// let v3 = Variant::from(datetime_nanos);
587-
/// assert_eq!(v3.as_timestamp_micros(), None);
578+
/// // but not for other variants.
579+
/// let datetime_nanos = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_nano_opt(12, 33, 54, 123456789).unwrap();
580+
/// let v2 = Variant::from(datetime_nanos);
581+
/// assert_eq!(v2.as_timestamp_micros(), None);
588582
/// ```
589-
pub fn as_timestamp_micros(&self) -> Option<i64> {
583+
pub fn as_timestamp_ntz_micros(&self) -> Option<NaiveDateTime> {
590584
match *self {
591-
Variant::TimestampMicros(d) => Some(d.timestamp_micros()),
592-
Variant::TimestampNtzMicros(d) => Some(d.and_utc().timestamp_micros()),
585+
Variant::TimestampNtzMicros(d) => Some(d),
593586
_ => None,
594587
}
595588
}
596589

597-
/// Converts this variant to a `i64` representing nanoseconds since the Unix epoch if possible.
598-
/// This is useful when convert the variant to arrow types.
590+
/// Converts this variant to a `DateTime<Utc>` if possible.
591+
///
592+
/// Returns `Some(DateTime<Utc>)` for [`Variant::TimestampNanos`] variants,
593+
/// `None` for other variants.
599594
///
600-
/// Returns Some(i64) for [`Variant::TimestampNanos`] and [`Variant::TimestampNtzNanos`],
601-
/// None for the other variant types.
595+
/// # Examples
602596
///
603597
/// ```
604598
/// use parquet_variant::Variant;
605599
/// use chrono::NaiveDate;
606600
///
607-
/// // you can extract an i64 from Variant::TimestampNanos
608-
/// let datetime = NaiveDate::from_ymd_opt(2025, 10, 03).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap().and_utc();
601+
/// // you can extract a DateTime<Utc> from a UTC-adjusted variant
602+
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap().and_utc();
609603
/// let v1 = Variant::from(datetime);
610-
/// assert_eq!(v1.as_timestamp_nanos(), Some(1759494896789123456));
611-
///
612-
/// // or Variant::TimestampNtzNanos
613-
/// let datetime_ntz = NaiveDate::from_ymd_opt(2025, 10, 03).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap();
614-
/// let v2 = Variant::from(datetime_ntz);
615-
/// assert_eq!(v1.as_timestamp_nanos(), Some(1759494896789123456));
604+
/// assert_eq!(v1.as_timestamp_nanos(), Some(datetime));
616605
///
617-
/// // but not from other variants
618-
/// let datetime_micros = NaiveDate::from_ymd_opt(2025, 10, 03).unwrap().and_hms_micro_opt(12, 34, 56, 789).unwrap().and_utc();
619-
/// let v3 = Variant::from(datetime_micros);
620-
/// assert_eq!(v3.as_timestamp_nanos(), None);
606+
/// // but not for other variants.
607+
/// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_milli_opt(12, 33, 54, 123).unwrap().and_utc();
608+
/// // this will convert to `Variant::TimestampMicros`.
609+
/// let v2 = Variant::from(datetime_micros);
610+
/// assert_eq!(v2.as_timestamp_nanos(), None);
621611
/// ```
622-
pub fn as_timestamp_nanos(&self) -> Option<i64> {
612+
pub fn as_timestamp_nanos(&self) -> Option<DateTime<Utc>> {
623613
match *self {
624-
Variant::TimestampNanos(d) => d.timestamp_nanos_opt(),
625-
Variant::TimestampNtzNanos(d) => d.and_utc().timestamp_nanos_opt(),
614+
Variant::TimestampNanos(d) => Some(d),
626615
_ => None,
627616
}
628617
}
629618

630619
/// Converts this variant to a `NaiveDateTime` if possible.
631620
///
632-
/// Returns `Some(NaiveDateTime)` for timestamp variants,
633-
/// `None` for non-timestamp variants.
621+
/// Returns `Some(NaiveDateTime)` for [`Variant::TimestampNtzNanos`] variants,
622+
/// `None` for other variants.
634623
///
635624
/// # Examples
636625
///
@@ -639,22 +628,19 @@ impl<'m, 'v> Variant<'m, 'v> {
639628
/// use chrono::NaiveDate;
640629
///
641630
/// // you can extract a NaiveDateTime from a non-UTC-adjusted variant
642-
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_milli_opt(12, 34, 56, 780).unwrap();
631+
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_nano_opt(12, 34, 56, 789123456).unwrap();
643632
/// let v1 = Variant::from(datetime);
644-
/// assert_eq!(v1.as_naive_datetime(), Some(datetime));
645-
///
646-
/// // or a UTC-adjusted variant
647-
/// let datetime = NaiveDate::from_ymd_opt(2025, 4, 16).unwrap().and_hms_nano_opt(12, 34, 56, 123456789).unwrap();
648-
/// let v2 = Variant::from(datetime);
649-
/// assert_eq!(v2.as_naive_datetime(), Some(datetime));
633+
/// assert_eq!(v1.as_timestamp_ntz_nanos(), Some(datetime));
650634
///
651-
/// // but not from other variants
652-
/// let v3 = Variant::from("hello!");
653-
/// assert_eq!(v3.as_naive_datetime(), None);
635+
/// // but not for other variants.
636+
/// let datetime_micros = NaiveDate::from_ymd_opt(2025, 8, 14).unwrap().and_hms_milli_opt(12, 33, 54, 123).unwrap();
637+
/// // this will convert to `Variant::TimestampMicros`.
638+
/// let v2 = Variant::from(datetime_micros);
639+
/// assert_eq!(v2.as_timestamp_ntz_nanos(), None);
654640
/// ```
655-
pub fn as_naive_datetime(&self) -> Option<NaiveDateTime> {
641+
pub fn as_timestamp_ntz_nanos(&self) -> Option<NaiveDateTime> {
656642
match *self {
657-
Variant::TimestampNtzMicros(d) | Variant::TimestampNtzNanos(d) => Some(d),
643+
Variant::TimestampNtzNanos(d) => Some(d),
658644
_ => None,
659645
}
660646
}

0 commit comments

Comments
 (0)