Skip to content

Commit f161a21

Browse files
committed
implement TemporalParser UTF16 support
1 parent d944a7d commit f161a21

File tree

8 files changed

+1111
-212
lines changed

8 files changed

+1111
-212
lines changed

src/builtins/core/date.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use crate::{
1010
ArithmeticOverflow, DifferenceOperation, DifferenceSettings, Disambiguation,
1111
DisplayCalendar, ResolvedRoundingOptions, Unit, UnitGroup,
1212
},
13-
parsers::{parse_date_time, IxdtfStringBuilder},
13+
parsers::{parse_date_time, IxdtfStringBuilder, TemporalParser},
1414
provider::{NeverProvider, TimeZoneProvider},
1515
MonthCode, TemporalError, TemporalResult, TemporalUnwrap, TimeZone,
1616
};
@@ -490,6 +490,25 @@ impl PlainDate {
490490
Self::try_new(date.year, date.month, date.day, calendar)
491491
}
492492

493+
/// Converts a UTF-16 encoded string into a `PlainDate`.
494+
pub fn from_utf16(s: &[u16]) -> TemporalResult<Self> {
495+
let parser = TemporalParser::from_utf16(s);
496+
let parsed = parser.parse_date_time()?;
497+
498+
let calendar = if let Some(cal_bytes) = parsed.calendar {
499+
Calendar::try_from_utf8(&cal_bytes)?
500+
} else {
501+
Calendar::default()
502+
};
503+
504+
Self::try_new(
505+
parsed.iso.date.year,
506+
parsed.iso.date.month,
507+
parsed.iso.date.day,
508+
calendar,
509+
)
510+
}
511+
493512
/// Creates a date time with values from a `PartialDate`.
494513
pub fn with(
495514
&self,

src/builtins/core/datetime.rs

Lines changed: 150 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -549,13 +549,25 @@ impl PlainDateTime {
549549

550550
// Converts a UTF-8 encoded string into a `PlainDateTime`.
551551
pub fn from_utf8(s: &[u8]) -> TemporalResult<Self> {
552-
let parser = TemporalParser::new();
553-
let parsed = parser.parse_date_time(core::str::from_utf8(s).map_err(|_| {
554-
TemporalError::syntax().with_message("Invalid UTF-8 in datetime string")
555-
})?)?;
552+
let parser = TemporalParser::from_utf8(s);
553+
let parsed = parser.parse_date_time()?;
556554

557-
let calendar = if let Some(cal_str) = &parsed.calendar {
558-
Calendar::try_from_utf8(cal_str.as_bytes())?
555+
let calendar = if let Some(cal_bytes) = parsed.calendar {
556+
Calendar::try_from_utf8(&cal_bytes)?
557+
} else {
558+
Calendar::default()
559+
};
560+
561+
Ok(Self::new_unchecked(parsed.iso, calendar))
562+
}
563+
564+
/// Converts a UTF-16 encoded string into a `PlainDateTime`.
565+
pub fn from_utf16(s: &[u16]) -> TemporalResult<Self> {
566+
let parser = TemporalParser::from_utf16(s);
567+
let parsed = parser.parse_date_time()?;
568+
569+
let calendar = if let Some(cal_bytes) = parsed.calendar {
570+
Calendar::try_from_utf8(&cal_bytes)?
559571
} else {
560572
Calendar::default()
561573
};
@@ -1518,4 +1530,136 @@ mod tests {
15181530
"pads 4 decimal places to 9"
15191531
);
15201532
}
1533+
1534+
#[test]
1535+
fn test_utf16_datetime_parsing() {
1536+
use alloc::vec::Vec;
1537+
1538+
let datetime_str = "2023-05-15T14:30:45.123";
1539+
let datetime_utf16: Vec<u16> = datetime_str.encode_utf16().collect();
1540+
1541+
// Test UTF-16 parsing
1542+
let datetime_utf16_result = PlainDateTime::from_utf16(&datetime_utf16).unwrap();
1543+
1544+
// Test UTF-8 parsing for comparison
1545+
let datetime_utf8_result = PlainDateTime::from_utf8(datetime_str.as_bytes()).unwrap();
1546+
1547+
// Compare results
1548+
assert_eq!(datetime_utf16_result.year(), datetime_utf8_result.year());
1549+
assert_eq!(datetime_utf16_result.month(), datetime_utf8_result.month());
1550+
assert_eq!(datetime_utf16_result.day(), datetime_utf8_result.day());
1551+
assert_eq!(datetime_utf16_result.hour(), datetime_utf8_result.hour());
1552+
assert_eq!(
1553+
datetime_utf16_result.minute(),
1554+
datetime_utf8_result.minute()
1555+
);
1556+
assert_eq!(
1557+
datetime_utf16_result.second(),
1558+
datetime_utf8_result.second()
1559+
);
1560+
assert_eq!(
1561+
datetime_utf16_result.millisecond(),
1562+
datetime_utf8_result.millisecond()
1563+
);
1564+
1565+
// Test specific values
1566+
assert_eq!(datetime_utf16_result.year(), 2023);
1567+
assert_eq!(datetime_utf16_result.month(), 5);
1568+
assert_eq!(datetime_utf16_result.day(), 15);
1569+
assert_eq!(datetime_utf16_result.hour(), 14);
1570+
assert_eq!(datetime_utf16_result.minute(), 30);
1571+
assert_eq!(datetime_utf16_result.second(), 45);
1572+
assert_eq!(datetime_utf16_result.millisecond(), 123);
1573+
}
1574+
1575+
#[test]
1576+
fn test_temporal_parser_from_str_as_utf8() {
1577+
use crate::parsers::TemporalParser;
1578+
1579+
let datetime_str = "2023-05-15T14:30:45.123";
1580+
let parser = TemporalParser::from_str_as_utf8(datetime_str);
1581+
1582+
// Test that the parser works correctly with the renamed method
1583+
let parsed = parser.parse_date_time().unwrap();
1584+
1585+
assert_eq!(parsed.iso.date.year, 2023);
1586+
assert_eq!(parsed.iso.date.month, 5);
1587+
assert_eq!(parsed.iso.date.day, 15);
1588+
assert_eq!(parsed.iso.time.hour, 14);
1589+
assert_eq!(parsed.iso.time.minute, 30);
1590+
assert_eq!(parsed.iso.time.second, 45);
1591+
assert_eq!(parsed.iso.time.millisecond, 123);
1592+
}
1593+
1594+
#[test]
1595+
fn test_all_temporal_types_utf16_support() {
1596+
use crate::{Instant, PlainDate, PlainMonthDay, PlainTime, PlainYearMonth};
1597+
use alloc::vec::Vec;
1598+
1599+
// Test all temporal types have consistent UTF-16 support
1600+
let datetime_str = "2023-05-15T14:30:45.123";
1601+
let datetime_utf16: Vec<u16> = datetime_str.encode_utf16().collect();
1602+
1603+
let time_str = "14:30:45.123";
1604+
let time_utf16: Vec<u16> = time_str.encode_utf16().collect();
1605+
1606+
let date_str = "2023-05-15T00:00:00";
1607+
let date_utf16: Vec<u16> = date_str.encode_utf16().collect();
1608+
1609+
let year_month_str = "2023-05";
1610+
let year_month_utf16: Vec<u16> = year_month_str.encode_utf16().collect();
1611+
1612+
let month_day_str = "05-15";
1613+
let month_day_utf16: Vec<u16> = month_day_str.encode_utf16().collect();
1614+
1615+
let instant_str = "2023-05-15T14:30:45.123Z";
1616+
let instant_utf16: Vec<u16> = instant_str.encode_utf16().collect();
1617+
1618+
// Test that all types can parse UTF-16
1619+
let datetime = PlainDateTime::from_utf16(&datetime_utf16).unwrap();
1620+
assert_eq!(datetime.year(), 2023);
1621+
assert_eq!(datetime.month(), 5);
1622+
assert_eq!(datetime.day(), 15);
1623+
assert_eq!(datetime.hour(), 14);
1624+
assert_eq!(datetime.minute(), 30);
1625+
assert_eq!(datetime.second(), 45);
1626+
assert_eq!(datetime.millisecond(), 123);
1627+
1628+
let time = PlainTime::from_utf16(&time_utf16).unwrap();
1629+
assert_eq!(time.hour(), 14);
1630+
assert_eq!(time.minute(), 30);
1631+
assert_eq!(time.second(), 45);
1632+
assert_eq!(time.millisecond(), 123);
1633+
1634+
let date = PlainDate::from_utf16(&date_utf16).unwrap();
1635+
assert_eq!(date.year(), 2023);
1636+
assert_eq!(date.month(), 5);
1637+
assert_eq!(date.day(), 15);
1638+
1639+
let year_month = PlainYearMonth::from_utf16(&year_month_utf16).unwrap();
1640+
assert_eq!(year_month.year(), 2023);
1641+
assert_eq!(year_month.month(), 5);
1642+
1643+
let month_day = PlainMonthDay::from_utf16(&month_day_utf16).unwrap();
1644+
assert_eq!(month_day.iso_month(), 5);
1645+
assert_eq!(month_day.day(), 15);
1646+
1647+
let instant = Instant::from_utf16(&instant_utf16).unwrap();
1648+
assert_eq!(instant.epoch_milliseconds(), 1684161045123);
1649+
1650+
// Test UTF-16 vs UTF-8 equivalence
1651+
let datetime_utf8 = PlainDateTime::from_utf8(datetime_str.as_bytes()).unwrap();
1652+
let time_utf8 = PlainTime::from_utf8(time_str.as_bytes()).unwrap();
1653+
let date_utf8 = PlainDate::from_utf8(date_str.as_bytes()).unwrap();
1654+
let year_month_utf8 = PlainYearMonth::from_utf8(year_month_str.as_bytes()).unwrap();
1655+
let month_day_utf8 = PlainMonthDay::from_utf8(month_day_str.as_bytes()).unwrap();
1656+
let instant_utf8 = Instant::from_utf8(instant_str.as_bytes()).unwrap();
1657+
1658+
assert_eq!(datetime, datetime_utf8);
1659+
assert_eq!(time, time_utf8);
1660+
assert_eq!(date, date_utf8);
1661+
assert_eq!(year_month, year_month_utf8);
1662+
assert_eq!(month_day, month_day_utf8);
1663+
assert_eq!(instant, instant_utf8);
1664+
}
15211665
}

src/builtins/core/instant.rs

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,10 +278,52 @@ impl Instant {
278278

279279
// Converts a UTF-8 encoded string into a `Instant`.
280280
pub fn from_utf8(s: &[u8]) -> TemporalResult<Self> {
281-
let parser = TemporalParser::new();
282-
let parsed = parser.parse_instant(core::str::from_utf8(s).map_err(|_| {
283-
TemporalError::syntax().with_message("Invalid UTF-8 in instant string")
284-
})?)?;
281+
let parser = TemporalParser::from_utf8(s);
282+
let parsed = parser.parse_instant()?;
283+
284+
// Find the offset
285+
let ns_offset = match parsed.offset {
286+
UtcOffsetRecordOrZ::Offset(offset) => {
287+
let ns = offset
288+
.fraction()
289+
.and_then(|x| x.to_nanoseconds())
290+
.unwrap_or(0);
291+
(offset.hour() as i64 * NANOSECONDS_PER_HOUR
292+
+ i64::from(offset.minute()) * NANOSECONDS_PER_MINUTE
293+
+ i64::from(offset.second().unwrap_or(0)) * NANOSECONDS_PER_SECOND
294+
+ i64::from(ns))
295+
* offset.sign() as i64
296+
}
297+
UtcOffsetRecordOrZ::Z => 0,
298+
};
299+
300+
let time_nanoseconds = parsed.iso.time.millisecond as u32 * 1_000_000
301+
+ parsed.iso.time.microsecond as u32 * 1_000
302+
+ parsed.iso.time.nanosecond as u32;
303+
let (millisecond, rem) = time_nanoseconds.div_rem_euclid(&1_000_000);
304+
let (microsecond, nanosecond) = rem.div_rem_euclid(&1_000);
305+
306+
let balanced = IsoDateTime::balance(
307+
parsed.iso.date.year,
308+
parsed.iso.date.month.into(),
309+
parsed.iso.date.day.into(),
310+
parsed.iso.time.hour.into(),
311+
parsed.iso.time.minute.into(),
312+
parsed.iso.time.second.clamp(0, 59).into(),
313+
millisecond.into(),
314+
microsecond.into(),
315+
i128::from(nanosecond) - i128::from(ns_offset),
316+
);
317+
318+
let nanoseconds = balanced.as_nanoseconds()?;
319+
320+
Ok(Self(nanoseconds))
321+
}
322+
323+
/// Converts a UTF-16 encoded string into a `Instant`.
324+
pub fn from_utf16(s: &[u16]) -> TemporalResult<Self> {
325+
let parser = TemporalParser::from_utf16(s);
326+
let parsed = parser.parse_instant()?;
285327

286328
// Find the offset
287329
let ns_offset = match parsed.offset {

src/builtins/core/month_day.rs

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,40 @@ impl PlainMonthDay {
189189

190190
// Converts a UTF-8 encoded string into a `PlainMonthDay`.
191191
pub fn from_utf8(s: &[u8]) -> TemporalResult<Self> {
192-
let parser = TemporalParser::new();
193-
let parsed = parser.parse_month_day(core::str::from_utf8(s).map_err(|_| {
194-
TemporalError::syntax().with_message("Invalid UTF-8 in month-day string")
195-
})?)?;
192+
let parser = TemporalParser::from_utf8(s);
193+
let parsed = parser.parse_month_day()?;
196194

197-
let calendar = if let Some(cal_str) = &parsed.calendar {
198-
Calendar::try_from_utf8(cal_str.as_bytes())?
195+
let calendar = if let Some(cal_bytes) = parsed.calendar {
196+
Calendar::try_from_utf8(&cal_bytes)?
197+
} else {
198+
Calendar::default()
199+
};
200+
201+
// ParseISODateTime
202+
// Step 4.a.ii.3
203+
// If goal is TemporalMonthDayString or TemporalYearMonthString, calendar is
204+
// not empty, and the ASCII-lowercase of calendar is not "iso8601", throw a
205+
// RangeError exception.
206+
if !calendar.is_iso() {
207+
return Err(TemporalError::range().with_message("non-ISO calendar not supported."));
208+
}
209+
210+
Self::new_with_overflow(
211+
parsed.iso.month,
212+
parsed.iso.day,
213+
calendar,
214+
ArithmeticOverflow::Reject,
215+
None,
216+
)
217+
}
218+
219+
/// Converts a UTF-16 encoded string into a `PlainMonthDay`.
220+
pub fn from_utf16(s: &[u16]) -> TemporalResult<Self> {
221+
let parser = TemporalParser::from_utf16(s);
222+
let parsed = parser.parse_month_day()?;
223+
224+
let calendar = if let Some(cal_bytes) = parsed.calendar {
225+
Calendar::try_from_utf8(&cal_bytes)?
199226
} else {
200227
Calendar::default()
201228
};

src/builtins/core/time.rs

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -418,11 +418,15 @@ impl PlainTime {
418418

419419
// Converts a UTF-8 encoded string into a `PlainTime`.
420420
pub fn from_utf8(s: &[u8]) -> TemporalResult<Self> {
421-
let parser = TemporalParser::new();
422-
let parsed =
423-
parser.parse_time(core::str::from_utf8(s).map_err(|_| {
424-
TemporalError::syntax().with_message("Invalid UTF-8 in time string")
425-
})?)?;
421+
let parser = TemporalParser::from_utf8(s);
422+
let parsed = parser.parse_time()?;
423+
Ok(Self::new_unchecked(parsed.iso))
424+
}
425+
426+
/// Converts a UTF-16 encoded string into a `PlainTime`.
427+
pub fn from_utf16(s: &[u16]) -> TemporalResult<Self> {
428+
let parser = TemporalParser::from_utf16(s);
429+
let parsed = parser.parse_time()?;
426430
Ok(Self::new_unchecked(parsed.iso))
427431
}
428432

src/builtins/core/year_month.rs

Lines changed: 45 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ use crate::{
1515
provider::NeverProvider,
1616
temporal_assert,
1717
utils::pad_iso_year,
18-
Calendar, MonthCode, TemporalError, TemporalResult, TimeZone,
18+
Calendar, MonthCode, TemporalError, TemporalResult, TemporalUnwrap, TimeZone,
1919
};
2020
use icu_calendar::AnyCalendarKind;
2121

@@ -525,13 +525,51 @@ impl PlainYearMonth {
525525

526526
// Converts a UTF-8 encoded string into a `PlainYearMonth`.
527527
pub fn from_utf8(s: &[u8]) -> TemporalResult<Self> {
528-
let parser = TemporalParser::new();
529-
let parsed = parser.parse_year_month(core::str::from_utf8(s).map_err(|_| {
530-
TemporalError::syntax().with_message("Invalid UTF-8 in year-month string")
531-
})?)?;
528+
let parser = TemporalParser::from_utf8(s);
529+
let parsed = parser.parse_year_month()?;
532530

533-
let calendar = if let Some(cal_str) = &parsed.calendar {
534-
Calendar::try_from_utf8(cal_str.as_bytes())?
531+
let calendar = if let Some(cal_bytes) = parsed.calendar {
532+
Calendar::try_from_utf8(&cal_bytes)?
533+
} else {
534+
Calendar::default()
535+
};
536+
537+
// ParseISODateTime
538+
// Step 4.a.ii.3
539+
// If goal is TemporalMonthDayString or TemporalYearMonthString, calendar is
540+
// not empty, and the ASCII-lowercase of calendar is not "iso8601", throw a
541+
// RangeError exception.
542+
if !calendar.is_iso() {
543+
return Err(TemporalError::range().with_message("non-ISO calendar not supported."));
544+
}
545+
546+
// The below steps are from `ToTemporalYearMonth`
547+
// 10. Let isoDate be CreateISODateRecord(result.[[Year]], result.[[Month]], result.[[Day]]).
548+
let iso = parsed.iso;
549+
550+
// 11. If ISOYearMonthWithinLimits(isoDate) is false, throw a RangeError exception.
551+
if !year_month_within_limits(iso.year, iso.month) {
552+
return Err(TemporalError::range().with_message("Exceeded valid range."));
553+
}
554+
555+
let intermediate = Self::new_unchecked(iso, calendar);
556+
// 12. Set result to ISODateToFields(calendar, isoDate, year-month).
557+
let partial = PartialYearMonth::try_from_year_month(&intermediate)?;
558+
// 13. NOTE: The following operation is called with constrain regardless of the
559+
// value of overflow, in order for the calendar to store a canonical value in the
560+
// [[Day]] field of the [[ISODate]] internal slot of the result.
561+
// 14. Set isoDate to ? CalendarYearMonthFromFields(calendar, result, constrain).
562+
// 15. Return ! CreateTemporalYearMonth(isoDate, calendar).
563+
PlainYearMonth::from_partial(partial, ArithmeticOverflow::Constrain)
564+
}
565+
566+
/// Converts a UTF-16 encoded string into a `PlainYearMonth`.
567+
pub fn from_utf16(s: &[u16]) -> TemporalResult<Self> {
568+
let parser = TemporalParser::from_utf16(s);
569+
let parsed = parser.parse_year_month()?;
570+
571+
let calendar = if let Some(cal_bytes) = parsed.calendar {
572+
Calendar::try_from_utf8(&cal_bytes)?
535573
} else {
536574
Calendar::default()
537575
};

0 commit comments

Comments
 (0)