2323from xarray .compat .pdcompat import default_precision_timestamp , timestamp_as_unit
2424from xarray .core import indexing
2525from xarray .core .common import contains_cftime_datetimes , is_np_datetime_like
26- from xarray .core .duck_array_ops import array_all , array_any , asarray , ravel , reshape
26+ from xarray .core .duck_array_ops import array_all , asarray , ravel , reshape
2727from xarray .core .formatting import first_n_items , format_timestamp , last_item
2828from xarray .core .utils import attempt_import , emit_user_level_warning
2929from xarray .core .variable import Variable
@@ -298,6 +298,19 @@ def _unpack_time_unit_and_ref_date(
298298 return time_unit , ref_date
299299
300300
301+ def _unpack_time_units_and_ref_date_cftime (units : str , calendar : str ):
302+ # same as _unpack_netcdf_time_units but finalizes ref_date for
303+ # processing in encode_cf_datetime
304+ time_units , ref_date = _unpack_netcdf_time_units (units )
305+ ref_date = cftime .num2date (
306+ 0 ,
307+ units = f"microseconds since { ref_date } " ,
308+ calendar = calendar ,
309+ only_use_cftime_datetimes = True ,
310+ )
311+ return time_units , ref_date
312+
313+
301314def _decode_cf_datetime_dtype (
302315 data ,
303316 units : str ,
@@ -686,6 +699,7 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str:
686699 # todo: check, if this function works correctly wrt np.timedelta64
687700 unit_timedelta : Callable [[str ], timedelta ] | Callable [[str ], np .timedelta64 ]
688701 zero_timedelta : timedelta | np .timedelta64
702+ unique_timedeltas = asarray (unique_timedeltas )
689703 if unique_timedeltas .dtype == np .dtype ("O" ):
690704 time_units = _NETCDF_TIME_UNITS_CFTIME
691705 unit_timedelta = _unit_timedelta_cftime
@@ -700,6 +714,10 @@ def _infer_time_units_from_diff(unique_timedeltas) -> str:
700714 return "seconds"
701715
702716
717+ def _time_units_to_timedelta (units : str ) -> timedelta :
718+ return timedelta (microseconds = _US_PER_TIME_DELTA [units ])
719+
720+
703721def infer_calendar_name (dates ) -> CFCalendar :
704722 """Given an array of datetimes, infer the CF calendar name"""
705723 if is_np_datetime_like (dates .dtype ):
@@ -974,42 +992,6 @@ def _division(deltas, delta, floor):
974992 return num
975993
976994
977- def _cast_to_dtype_if_safe (num : np .ndarray , dtype : np .dtype ) -> np .ndarray :
978- with warnings .catch_warnings ():
979- warnings .filterwarnings ("ignore" , message = "overflow" )
980- cast_num = np .asarray (num , dtype = dtype )
981-
982- if np .issubdtype (dtype , np .integer ):
983- if not array_all (num == cast_num ):
984- if np .issubdtype (num .dtype , np .floating ):
985- raise ValueError (
986- f"Not possible to cast all encoded times from "
987- f"{ num .dtype !r} to { dtype !r} without losing precision. "
988- f"Consider modifying the units such that integer values "
989- f"can be used, or removing the units and dtype encoding, "
990- f"at which point xarray will make an appropriate choice."
991- )
992- else :
993- raise OverflowError (
994- f"Not possible to cast encoded times from "
995- f"{ num .dtype !r} to { dtype !r} without overflow. Consider "
996- f"removing the dtype encoding, at which point xarray will "
997- f"make an appropriate choice, or explicitly switching to "
998- "a larger integer dtype."
999- )
1000- else :
1001- if array_any (np .isinf (cast_num )):
1002- raise OverflowError (
1003- f"Not possible to cast encoded times from { num .dtype !r} to "
1004- f"{ dtype !r} without overflow. Consider removing the dtype "
1005- f"encoding, at which point xarray will make an appropriate "
1006- f"choice, or explicitly switching to a larger floating point "
1007- f"dtype."
1008- )
1009-
1010- return cast_num
1011-
1012-
1013995def encode_cf_datetime (
1014996 dates : T_DuckArray , # type: ignore[misc]
1015997 units : str | None = None ,
@@ -1032,6 +1014,27 @@ def encode_cf_datetime(
10321014 return _eagerly_encode_cf_datetime (dates , units , calendar , dtype )
10331015
10341016
1017+ def _infer_needed_units_numpy (ref_date , data_units ):
1018+ needed_units , data_ref_date = _unpack_time_unit_and_ref_date (data_units )
1019+ needed_units = _numpy_to_netcdf_timeunit (needed_units )
1020+ ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
1021+ data_delta = _unit_timedelta_numpy (needed_units )
1022+ if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
1023+ needed_units = _infer_time_units_from_diff (ref_delta )
1024+ return needed_units
1025+
1026+
1027+ def _infer_needed_units_cftime (ref_date , data_units , calendar ):
1028+ needed_units , data_ref_date = _unpack_time_units_and_ref_date_cftime (
1029+ data_units , calendar
1030+ )
1031+ ref_delta = abs (data_ref_date - ref_date )
1032+ data_delta = _time_units_to_timedelta (needed_units )
1033+ if (ref_delta % data_delta ) > timedelta (seconds = 0 ):
1034+ needed_units = _infer_time_units_from_diff (ref_delta )
1035+ return needed_units
1036+
1037+
10351038def _eagerly_encode_cf_datetime (
10361039 dates : T_DuckArray , # type: ignore[misc]
10371040 units : str | None = None ,
@@ -1049,6 +1052,7 @@ def _eagerly_encode_cf_datetime(
10491052 if calendar is None :
10501053 calendar = infer_calendar_name (dates )
10511054
1055+ raise_incompatible_units_error = False
10521056 try :
10531057 if not _is_standard_calendar (calendar ) or dates .dtype .kind == "O" :
10541058 # parse with cftime instead
@@ -1081,16 +1085,7 @@ def _eagerly_encode_cf_datetime(
10811085 time_deltas = dates_as_index - ref_date
10821086
10831087 # retrieve needed units to faithfully encode to int64
1084- needed_unit , data_ref_date = _unpack_time_unit_and_ref_date (data_units )
1085- needed_units = _numpy_to_netcdf_timeunit (needed_unit )
1086- if data_units != units :
1087- # this accounts for differences in the reference times
1088- ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
1089- data_delta = np .timedelta64 (1 , needed_unit )
1090- if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
1091- needed_units = _infer_time_units_from_diff (ref_delta )
1092-
1093- # needed time delta to encode faithfully to int64
1088+ needed_units = _infer_needed_units_numpy (ref_date , data_units )
10941089 needed_time_delta = _unit_timedelta_numpy (needed_units )
10951090
10961091 floor_division = np .issubdtype (dtype , np .integer ) or dtype is None
@@ -1104,7 +1099,6 @@ def _eagerly_encode_cf_datetime(
11041099 f"Set encoding['dtype'] to floating point dtype to silence this warning."
11051100 )
11061101 elif np .issubdtype (dtype , np .integer ) and allow_units_modification :
1107- floor_division = True
11081102 new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
11091103 emit_user_level_warning (
11101104 f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
@@ -1114,6 +1108,10 @@ def _eagerly_encode_cf_datetime(
11141108 )
11151109 units = new_units
11161110 time_delta = needed_time_delta
1111+ floor_division = True
1112+ elif np .issubdtype (dtype , np .integer ) and not allow_units_modification :
1113+ new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
1114+ raise_incompatible_units_error = True
11171115
11181116 # get resolution of TimedeltaIndex and align time_delta
11191117 # todo: check, if this works in any case
@@ -1123,14 +1121,39 @@ def _eagerly_encode_cf_datetime(
11231121 num = reshape (num .values , dates .shape )
11241122
11251123 except (OutOfBoundsDatetime , OverflowError , ValueError ):
1124+ time_units , ref_date = _unpack_time_units_and_ref_date_cftime (units , calendar )
1125+ time_delta_cftime = _time_units_to_timedelta (time_units )
1126+ needed_units = _infer_needed_units_cftime (ref_date , data_units , calendar )
1127+ needed_time_delta_cftime = _time_units_to_timedelta (needed_units )
1128+
1129+ if (
1130+ np .issubdtype (dtype , np .integer )
1131+ and time_delta_cftime > needed_time_delta_cftime
1132+ ):
1133+ new_units = f"{ needed_units } since { format_cftime_datetime (ref_date )} "
1134+ if allow_units_modification :
1135+ emit_user_level_warning (
1136+ f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
1137+ f"Serializing with units { new_units !r} instead. "
1138+ f"Set encoding['dtype'] to floating point dtype to serialize with units { units !r} . "
1139+ f"Set encoding['units'] to { new_units !r} to silence this warning ."
1140+ )
1141+ units = new_units
1142+ else :
1143+ raise_incompatible_units_error = True
1144+
11261145 num = _encode_datetime_with_cftime (dates , units , calendar )
11271146 # do it now only for cftime-based flow
11281147 # we already covered for this in pandas-based flow
11291148 num = cast_to_int_if_safe (num )
11301149
1131- if dtype is not None :
1132- # todo: check, if this is really needed for all dtypes
1133- num = _cast_to_dtype_if_safe (num , dtype )
1150+ if raise_incompatible_units_error :
1151+ raise ValueError (
1152+ f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
1153+ f"Consider setting encoding['dtype'] to a floating point dtype to serialize with "
1154+ f"units { units !r} . Consider setting encoding['units'] to { new_units !r} to "
1155+ f"serialize with an integer dtype."
1156+ )
11341157
11351158 return num , units , calendar
11361159
@@ -1243,14 +1266,17 @@ def _eagerly_encode_cf_timedelta(
12431266 time_delta = needed_time_delta
12441267 time_delta = time_delta .astype (f"=m8[{ deltas_unit } ]" )
12451268 floor_division = True
1269+ elif np .issubdtype (dtype , np .integer ) and not allow_units_modification :
1270+ raise ValueError (
1271+ f"Timedeltas can't be serialized faithfully to int64 with requested units { units !r} . "
1272+ f"Consider setting encoding['dtype'] to a floating point dtype to serialize with "
1273+ f"units { units !r} . Consider setting encoding['units'] to { needed_units !r} to "
1274+ f"serialize with an integer dtype."
1275+ )
12461276
12471277 num = _division (time_deltas , time_delta , floor_division )
12481278 num = reshape (num .values , timedeltas .shape )
12491279
1250- if dtype is not None :
1251- # todo: check, if this is needed for all dtypes
1252- num = _cast_to_dtype_if_safe (num , dtype )
1253-
12541280 return num , units
12551281
12561282
@@ -1328,20 +1354,15 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
13281354
13291355 units = encoding .pop ("units" , None )
13301356 calendar = encoding .pop ("calendar" , None )
1331- dtype = encoding .pop ("dtype" , None )
1357+ dtype = encoding .get ("dtype" , None )
13321358
13331359 # in the case of packed data we need to encode into
13341360 # float first, the correct dtype will be established
13351361 # via CFScaleOffsetCoder/CFMaskCoder
1336- set_dtype_encoding = None
13371362 if "add_offset" in encoding or "scale_factor" in encoding :
1338- set_dtype_encoding = dtype
13391363 dtype = data .dtype if data .dtype .kind == "f" else "float64"
13401364 (data , units , calendar ) = encode_cf_datetime (data , units , calendar , dtype )
13411365
1342- # retain dtype for packed data
1343- if set_dtype_encoding is not None :
1344- safe_setitem (encoding , "dtype" , set_dtype_encoding , name = name )
13451366 safe_setitem (attrs , "units" , units , name = name )
13461367 safe_setitem (attrs , "calendar" , calendar , name = name )
13471368
@@ -1393,22 +1414,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
13931414 if np .issubdtype (variable .data .dtype , np .timedelta64 ):
13941415 dims , data , attrs , encoding = unpack_for_encoding (variable )
13951416
1396- dtype = encoding .pop ("dtype" , None )
1417+ dtype = encoding .get ("dtype" , None )
13971418
13981419 # in the case of packed data we need to encode into
13991420 # float first, the correct dtype will be established
14001421 # via CFScaleOffsetCoder/CFMaskCoder
1401- set_dtype_encoding = None
14021422 if "add_offset" in encoding or "scale_factor" in encoding :
1403- set_dtype_encoding = dtype
14041423 dtype = data .dtype if data .dtype .kind == "f" else "float64"
14051424
14061425 data , units = encode_cf_timedelta (data , encoding .pop ("units" , None ), dtype )
14071426
1408- # retain dtype for packed data
1409- if set_dtype_encoding is not None :
1410- safe_setitem (encoding , "dtype" , set_dtype_encoding , name = name )
1411-
14121427 safe_setitem (attrs , "units" , units , name = name )
14131428
14141429 return Variable (dims , data , attrs , encoding , fastpath = True )
0 commit comments