pydata · dcherian · Oct 15, 2025 · Jul 13, 2025 · Jul 13, 2025 · Jul 13, 2025
diff --git a/xarray/backends/api.py b/xarray/backends/api.py
@@ -35,7 +35,8 @@
 from xarray.core.utils import emit_user_level_warning, is_remote_uri
 from xarray.namedarray.daskmanager import DaskManager
 from xarray.namedarray.parallelcompat import guess_chunkmanager
-from xarray.structure.chunks import _get_chunk, _maybe_chunk
+from xarray.namedarray.utils import _get_chunk
+from xarray.structure.chunks import _maybe_chunk
 from xarray.structure.combine import (
     _infer_concat_order_from_positions,
     _nested_combine,
@@ -244,7 +245,16 @@ def _chunk_ds(
 
     variables = {}
     for name, var in backend_ds.variables.items():
-        var_chunks = _get_chunk(var, chunks, chunkmanager)
+        if var._in_memory:
+            variables[name] = var
+            continue
+        var_chunks = _get_chunk(
+            var._data,
+            chunks,
+            chunkmanager,
+            preferred_chunks=var.encoding.get("preferred_chunks", {}),
+            dims=var.dims,
+        )
         variables[name] = _maybe_chunk(
             name,
             var,

diff --git a/xarray/namedarray/daskmanager.py b/xarray/namedarray/daskmanager.py
@@ -264,3 +264,9 @@ def shuffle(
         if chunks != "auto":
             raise NotImplementedError("Only chunks='auto' is supported at present.")
         return dask.array.shuffle(x, indexer, axis, chunks="auto")
+
+    def get_auto_chunk_size(self) -> int:
+        from dask import config as dask_config
+        from dask.utils import parse_bytes
+
+        return parse_bytes(dask_config.get("array.chunk-size"))
diff --git a/xarray/namedarray/parallelcompat.py b/xarray/namedarray/parallelcompat.py
@@ -346,7 +346,14 @@ def rechunk(
         dask.array.Array.rechunk
         cubed.Array.rechunk
         """
-        return data.rechunk(chunks, **kwargs)
+        from xarray.core.common import _contains_cftime_datetimes
+        from xarray.namedarray.utils import _get_chunk
+
+        if _contains_cftime_datetimes(data):
+            chunks2 = _get_chunk(data, chunks, self, preferred_chunks={})  # type: ignore[arg-type]
+        else:
+            chunks2 = chunks  # type: ignore[assignment]
+        return data.rechunk(chunks2, **kwargs)
 
     @abstractmethod
     def compute(
@@ -746,3 +753,27 @@ def store(
         cubed.store
         """
         raise NotImplementedError()
+
+    def get_auto_chunk_size(
+        self,
+    ) -> int:
+        """
+        Get the default chunk size for a variable.
+
+        This is used to determine the chunk size when opening a dataset with
+        ``chunks="auto"`` or when rechunking an array with ``chunks="auto"``.
+
+        Parameters
+        ----------
+        target_chunksize : int, optional
+            The target chunk size in bytes. If not provided, a default value is used.
+
+        Returns
+        -------
+        chunk_size : int
+            The chunk size in bytes.
+        """
+
+        raise NotImplementedError(
+            "For 'auto' rechunking of cftime arrays, get_auto_chunk_size must be implemented by the chunk manager"
+        )
diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py
@@ -1,9 +1,12 @@
 from __future__ import annotations
 
 import importlib
+import itertools
+import sys
 import warnings
 from collections.abc import Hashable, Iterable, Iterator, Mapping
 from functools import lru_cache
+from numbers import Number
 from typing import TYPE_CHECKING, Any, TypeVar, cast
 
 import numpy as np
@@ -23,7 +26,9 @@
         DaskArray = NDArray  # type: ignore[assignment, misc]
         DaskCollection: Any = NDArray  # type: ignore[no-redef]
 
-    from xarray.namedarray._typing import _Dim, duckarray
+    from xarray.core.types import T_ChunkDim
+    from xarray.namedarray._typing import DuckArray, _Dim, duckarray
+    from xarray.namedarray.parallelcompat import ChunkManagerEntrypoint
 
 
 K = TypeVar("K")
@@ -195,6 +200,106 @@ def either_dict_or_kwargs(
     return pos_kwargs
 
 
+def _get_chunk(  # type: ignore[no-untyped-def]
+    data: DuckArray[Any],
+    chunks,
+    chunkmanager: ChunkManagerEntrypoint[Any],
+    *,
+    preferred_chunks,
+    dims=None,
+) -> Mapping[Any, T_ChunkDim]:
+    """
+    Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
+    """
+    from xarray.core.common import _contains_cftime_datetimes
+    from xarray.core.utils import emit_user_level_warning
+    from xarray.structure.chunks import _get_breaks_cached
+
+    dims = chunks.keys() if dims is None else dims
+    shape = data.shape
+
+    # Determine the explicit requested chunks.
+    preferred_chunk_shape = tuple(
+        itertools.starmap(preferred_chunks.get, zip(dims, shape, strict=True))
+    )
+    if isinstance(chunks, Number) or (chunks == "auto"):
+        chunks = dict.fromkeys(dims, chunks)
+    chunk_shape = tuple(
+        chunks.get(dim, None) or preferred_chunk_sizes
+        for dim, preferred_chunk_sizes in zip(dims, preferred_chunk_shape, strict=True)
+    )
+
+    limit: int | None
+    if _contains_cftime_datetimes(data):
+        limit, dtype = fake_target_chunksize(data, chunkmanager.get_auto_chunk_size())
+    else:
+        limit = None
+        dtype = data.dtype
+
+    chunk_shape = chunkmanager.normalize_chunks(
+        chunk_shape,
+        shape=shape,
+        dtype=dtype,
+        limit=limit,
+        previous_chunks=preferred_chunk_shape,
+    )
+
+    # Warn where requested chunks break preferred chunks, provided that the variable
+    # contains data.
+    if data.size:  # type: ignore[unused-ignore,attr-defined]  # DuckArray protocol doesn't include 'size' - should it?
+        for dim, size, chunk_sizes in zip(dims, shape, chunk_shape, strict=True):
+            if preferred_chunk_sizes := preferred_chunks.get(dim):
+                disagreement = _get_breaks_cached(
+                    size=size,
+                    chunk_sizes=chunk_sizes,
+                    preferred_chunk_sizes=preferred_chunk_sizes,
+                )
+                if disagreement:
+                    emit_user_level_warning(
+                        "The specified chunks separate the stored chunks along "
+                        f'dimension "{dim}" starting at index {disagreement}. This could '
+                        "degrade performance. Instead, consider rechunking after loading.",
+                    )
+
+    return dict(zip(dims, chunk_shape, strict=True))
+
+
+def fake_target_chunksize(
+    data: DuckArray[Any],
+    limit: int,
+) -> tuple[int, np.dtype[Any]]:
+    """
+    The `normalize_chunks` algorithm takes a size `limit` in bytes, but will not
+    work for object dtypes.  So we rescale the `limit` to an appropriate one based
+    on `float64` dtype, and pass that to `normalize_chunks`.
+
+    Arguments
+    ---------
+    data : Variable or ChunkedArray
+        The data for which we want to determine chunk sizes.
+    limit : int
+        The target chunk size in bytes. Passed to the chunk manager's `normalize_chunks` method.
+    """
+
+    # Short circuit for non-object dtypes
+    from xarray.core.common import _contains_cftime_datetimes
+
+    if not _contains_cftime_datetimes(data):
+        return limit, data.dtype
+
+    from xarray.core.formatting import first_n_items
+
+    output_dtype = np.dtype(np.float64)
+
+    nbytes_approx: int = sys.getsizeof(first_n_items(data, 1))  # type: ignore[no-untyped-call]
+
+    f64_nbytes = output_dtype.itemsize
+
+    limit = int(limit * (f64_nbytes / nbytes_approx))
+
+    return limit, output_dtype
+
+
 class ReprObject:
     """Object that prints as the given value, for use with sentinel values."""
 

diff --git a/xarray/structure/chunks.py b/xarray/structure/chunks.py
@@ -7,12 +7,10 @@
 import itertools
 from collections.abc import Hashable, Mapping
 from functools import lru_cache
-from numbers import Number
 from typing import TYPE_CHECKING, Any, Literal, TypeVar, Union, overload
 
 from xarray.core import utils
-from xarray.core.utils import emit_user_level_warning
-from xarray.core.variable import IndexVariable, Variable
+from xarray.core.variable import Variable
 from xarray.namedarray.parallelcompat import (
     ChunkManagerEntrypoint,
     get_chunked_array_type,
@@ -23,6 +21,7 @@
     from xarray.core.dataarray import DataArray
     from xarray.core.dataset import Dataset
     from xarray.core.types import T_ChunkDim
+    from xarray.core.variable import Variable
 
     MissingCoreDimOptions = Literal["raise", "copy", "drop"]
 
@@ -62,54 +61,6 @@ def _get_breaks_cached(
         return None
 
 
-def _get_chunk(var: Variable, chunks, chunkmanager: ChunkManagerEntrypoint):
-    """
-    Return map from each dim to chunk sizes, accounting for backend's preferred chunks.
-    """
-    if isinstance(var, IndexVariable):
-        return {}
-    dims = var.dims
-    shape = var.shape
-
-    # Determine the explicit requested chunks.
-    preferred_chunks = var.encoding.get("preferred_chunks", {})
-    preferred_chunk_shape = tuple(
-        itertools.starmap(preferred_chunks.get, zip(dims, shape, strict=True))
-    )
-    if isinstance(chunks, Number) or (chunks == "auto"):
-        chunks = dict.fromkeys(dims, chunks)
-    chunk_shape = tuple(
-        chunks.get(dim, None) or preferred_chunk_sizes
-        for dim, preferred_chunk_sizes in zip(dims, preferred_chunk_shape, strict=True)
-    )
-
-    chunk_shape = chunkmanager.normalize_chunks(
-        chunk_shape, shape=shape, dtype=var.dtype, previous_chunks=preferred_chunk_shape
-    )
-
-    # Warn where requested chunks break preferred chunks, provided that the variable
-    # contains data.
-    if var.size:
-        for dim, size, chunk_sizes in zip(dims, shape, chunk_shape, strict=True):
-            try:
-                preferred_chunk_sizes = preferred_chunks[dim]
-            except KeyError:
-                continue
-            disagreement = _get_breaks_cached(
-                size=size,
-                chunk_sizes=chunk_sizes,
-                preferred_chunk_sizes=preferred_chunk_sizes,
-            )
-            if disagreement:
-                emit_user_level_warning(
-                    "The specified chunks separate the stored chunks along "
-                    f'dimension "{dim}" starting at index {disagreement}. This could '
-                    "degrade performance. Instead, consider rechunking after loading.",
-                )
-
-    return dict(zip(dims, chunk_shape, strict=True))
-
-
 def _maybe_chunk(
     name: Hashable,
     var: Variable,

diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py
@@ -60,6 +60,7 @@
 from xarray.coding.variables import SerializationWarning
 from xarray.conventions import encode_dataset_coordinates
 from xarray.core import indexing
+from xarray.core.common import _contains_cftime_datetimes
 from xarray.core.indexes import PandasIndex
 from xarray.core.options import set_options
 from xarray.core.types import PDDatetimeUnitOptions
@@ -6238,6 +6239,32 @@ def test_open_multi_dataset(self) -> None:
             ) as actual:
                 assert_identical(expected, actual)
 
+    @requires_cftime
+    def test_open_dataset_cftime_autochunk(self) -> None:
+        """Create a dataset with cftime datetime objects and
+        ensure that auto-chunking works correctly."""
+        import cftime
+
+        original = xr.Dataset(
+            {
+                "foo": ("time", [0.0]),
+                "time_bnds": (
+                    ("time", "bnds"),
+                    [
+                        [
+                            cftime.Datetime360Day(2005, 12, 1, 0, 0, 0, 0),
+                            cftime.Datetime360Day(2005, 12, 2, 0, 0, 0, 0),
+                        ]
+                    ],
+                ),
+            },
+            {"time": [cftime.Datetime360Day(2005, 12, 1, 12, 0, 0, 0)]},
+        )
+        with self.roundtrip(original, open_kwargs={"chunks": "auto"}) as actual:
+            assert isinstance(actual.time_bnds.variable.data, da.Array)
+            assert _contains_cftime_datetimes(actual.time)
+            assert_identical(original, actual)
+
     # Flaky test. Very open to contributions on fixing this
     @pytest.mark.flaky
     def test_dask_roundtrip(self) -> None:

diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py
@@ -1161,6 +1161,21 @@ def test_auto_chunk_da(obj):
     assert actual.chunks == expected.chunks
 
 
+def test_auto_chunk_da_cftime():
+    yrs = np.arange(2000, 2120)
+    cftime_dates = xr.date_range(
+        start=f"{yrs[0]}-01-01", end=f"{yrs[-1]}-12-31", freq="1YE", use_cftime=True
+    )
+    yr_array = np.tile(cftime_dates.values, (10, 1))
+    da = xr.DataArray(
+        yr_array, dims=["x", "t"], coords={"x": np.arange(10), "t": cftime_dates}
+    ).chunk({"x": 4, "t": 5})
+    actual = da.chunk("auto").data
+    expected = da.data.rechunk({0: 10, 1: 120})
+    np.testing.assert_array_equal(actual, expected)
+    assert actual.chunks == expected.chunks
+
+
 def test_map_blocks_error(map_da, map_ds):
     def bad_func(darray):
         return (darray * darray.x + 5 * darray.y)[:1, :1]