Skip to content

Commit f8030d0

Browse files
Extend Cube Metadata NetCDF zlib compression (#6552)
* Extend Cube Metadata NetCDF zlib compression * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add whatsnew entry * review actions --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent e15c079 commit f8030d0

File tree

3 files changed

+200
-26
lines changed

3 files changed

+200
-26
lines changed

docs/src/whatsnew/latest.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,10 @@ This document explains the changes made to Iris for this release
5555
the :data:`iris.FUTURE` object, to avoid breaking existing code.
5656
(:issue:`3678`, :pull:`6481`, :pull:`6540`)
5757

58+
#. `@bjlittle`_ extended ``zlib`` compression of :class:`~iris.cube.Cube` data payload when saving to NetCDF
59+
to also include any auxiliary coordinates and ancillary variables with the same ``shape``.
60+
(:issue:`6539`, :pull:`6552`)
61+
5862

5963
🐛 Bugs Fixed
6064
=============

lib/iris/fileformats/netcdf/saver.py

Lines changed: 78 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -586,21 +586,28 @@ def write(
586586
# data-vars in the file.
587587
cf_mesh_name = self._add_mesh(cube)
588588

589+
# Group the generic compression keyword arguments together for
590+
# convenience, as they will be applied to other cube metadata
591+
# as well as the cube data payload.
592+
compression_kwargs = {
593+
"complevel": complevel,
594+
"fletcher32": fletcher32,
595+
"shuffle": shuffle,
596+
"zlib": zlib,
597+
}
598+
589599
# Create the associated cube CF-netCDF data variable.
590600
cf_var_cube = self._create_cf_data_variable(
591601
cube,
592602
cube_dimensions,
593-
local_keys,
594-
zlib=zlib,
595-
complevel=complevel,
596-
shuffle=shuffle,
597-
fletcher32=fletcher32,
603+
local_keys=local_keys,
604+
packing=packing,
605+
fill_value=fill_value,
598606
contiguous=contiguous,
599607
chunksizes=chunksizes,
600608
endian=endian,
601609
least_significant_digit=least_significant_digit,
602-
packing=packing,
603-
fill_value=fill_value,
610+
**compression_kwargs,
604611
)
605612

606613
# Associate any mesh with the data-variable.
@@ -615,15 +622,19 @@ def write(
615622

616623
# Add the auxiliary coordinate variables and associate the data
617624
# variable to them
618-
self._add_aux_coords(cube, cf_var_cube, cube_dimensions)
625+
self._add_aux_coords(
626+
cube, cf_var_cube, cube_dimensions, compression_kwargs=compression_kwargs
627+
)
619628

620629
# Add the cell_measures variables and associate the data
621630
# variable to them
622631
self._add_cell_measures(cube, cf_var_cube, cube_dimensions)
623632

624633
# Add the ancillary_variables variables and associate the data variable
625634
# to them
626-
self._add_ancillary_variables(cube, cf_var_cube, cube_dimensions)
635+
self._add_ancillary_variables(
636+
cube, cf_var_cube, cube_dimensions, compression_kwargs=compression_kwargs
637+
)
627638

628639
# Add the formula terms to the appropriate cf variables for each
629640
# aux factory in the cube.
@@ -883,7 +894,14 @@ def _add_mesh(self, cube_or_mesh):
883894
return cf_mesh_name
884895

885896
def _add_inner_related_vars(
886-
self, cube, cf_var_cube, dimension_names, coordlike_elements
897+
self,
898+
cube,
899+
cf_var_cube,
900+
dimension_names,
901+
coordlike_elements,
902+
/,
903+
*,
904+
compression_kwargs=None,
887905
):
888906
"""Create a set of variables for aux-coords, ancillaries or cell-measures.
889907
@@ -913,7 +931,10 @@ def _add_inner_related_vars(
913931
if cf_name is None:
914932
# Not already present : create it
915933
cf_name = self._create_generic_cf_array_var(
916-
cube, dimension_names, element
934+
cube,
935+
dimension_names,
936+
element,
937+
compression_kwargs=compression_kwargs,
917938
)
918939
self._name_coord_map.append(cf_name, element)
919940

@@ -929,7 +950,9 @@ def _add_inner_related_vars(
929950
variable_names = " ".join(sorted(element_names))
930951
_setncattr(cf_var_cube, role_attribute_name, variable_names)
931952

932-
def _add_aux_coords(self, cube, cf_var_cube, dimension_names):
953+
def _add_aux_coords(
954+
self, cube, cf_var_cube, dimension_names, /, *, compression_kwargs=None
955+
):
933956
"""Add aux. coordinate to the dataset and associate with the data variable.
934957
935958
Parameters
@@ -940,6 +963,9 @@ def _add_aux_coords(self, cube, cf_var_cube, dimension_names):
940963
A cf variable cube representation.
941964
dimension_names : list
942965
Names associated with the dimensions of the cube.
966+
compression_kwargs : dict, optional
967+
NetCDF data compression keyword arguments.
968+
943969
"""
944970
from iris.mesh.components import (
945971
MeshEdgeCoords,
@@ -967,6 +993,7 @@ def _add_aux_coords(self, cube, cf_var_cube, dimension_names):
967993
cf_var_cube,
968994
dimension_names,
969995
coords_to_add,
996+
compression_kwargs=compression_kwargs,
970997
)
971998

972999
def _add_cell_measures(self, cube, cf_var_cube, dimension_names):
@@ -988,7 +1015,9 @@ def _add_cell_measures(self, cube, cf_var_cube, dimension_names):
9881015
cube.cell_measures(),
9891016
)
9901017

991-
def _add_ancillary_variables(self, cube, cf_var_cube, dimension_names):
1018+
def _add_ancillary_variables(
1019+
self, cube, cf_var_cube, dimension_names, /, *, compression_kwargs=None
1020+
):
9921021
"""Add ancillary variables measures to the dataset and associate with the data variable.
9931022
9941023
Parameters
@@ -999,12 +1028,16 @@ def _add_ancillary_variables(self, cube, cf_var_cube, dimension_names):
9991028
A cf variable cube representation.
10001029
dimension_names : list
10011030
Names associated with the dimensions of the cube.
1031+
compression_kwargs : dict, optional
1032+
NetCDF data compression keyword arguments.
1033+
10021034
"""
10031035
return self._add_inner_related_vars(
10041036
cube,
10051037
cf_var_cube,
10061038
dimension_names,
10071039
cube.ancillary_variables(),
1040+
compression_kwargs=compression_kwargs,
10081041
)
10091042

10101043
def _add_dim_coords(self, cube, dimension_names):
@@ -1439,7 +1472,7 @@ def _ensure_valid_dtype(self, values, src_name, src_object):
14391472
values = values.astype(np.int32)
14401473
return values
14411474

1442-
def _create_cf_bounds(self, coord, cf_var, cf_name):
1475+
def _create_cf_bounds(self, coord, cf_var, cf_name, /, *, compression_kwargs=None):
14431476
"""Create the associated CF-netCDF bounds variable.
14441477
14451478
Parameters
@@ -1450,13 +1483,18 @@ def _create_cf_bounds(self, coord, cf_var, cf_name):
14501483
CF-netCDF variable.
14511484
cf_name : str
14521485
Name of the CF-NetCDF variable.
1486+
compression_kwargs : dict, optional
1487+
NetCDF data compression keyword arguments.
14531488
14541489
Returns
14551490
-------
14561491
None
14571492
14581493
"""
14591494
if hasattr(coord, "has_bounds") and coord.has_bounds():
1495+
if compression_kwargs is None:
1496+
compression_kwargs = {}
1497+
14601498
# Get the values in a form which is valid for the file format.
14611499
bounds = self._ensure_valid_dtype(
14621500
coord.core_bounds(), "the bounds of coordinate", coord
@@ -1489,6 +1527,7 @@ def _create_cf_bounds(self, coord, cf_var, cf_name):
14891527
boundsvar_name,
14901528
bounds.dtype.newbyteorder("="),
14911529
cf_var.dimensions + (bounds_dimension_name,),
1530+
**compression_kwargs,
14921531
)
14931532
self._lazy_stream_data(data=bounds, cf_var=cf_var_bounds)
14941533

@@ -1685,8 +1724,11 @@ def _create_generic_cf_array_var(
16851724
cube_or_mesh,
16861725
cube_dim_names,
16871726
element,
1727+
/,
1728+
*,
16881729
element_dims=None,
16891730
fill_value=None,
1731+
compression_kwargs=None,
16901732
):
16911733
"""Create theCF-netCDF variable given dimensional_metadata.
16921734
@@ -1718,6 +1760,8 @@ def _create_generic_cf_array_var(
17181760
If not set, standard netcdf4-python behaviour : the variable has no
17191761
'_FillValue' property, and uses the "standard" fill-value for its
17201762
type.
1763+
compression_kwargs : dict, optional
1764+
NetCDF data compression keyword arguments.
17211765
17221766
Returns
17231767
-------
@@ -1732,6 +1776,9 @@ def _create_generic_cf_array_var(
17321776
else:
17331777
cube = None
17341778

1779+
if compression_kwargs is None:
1780+
compression_kwargs = {}
1781+
17351782
# Work out the var-name to use.
17361783
# N.B. the only part of this routine that may use a mesh _or_ a cube.
17371784
cf_name = self._get_coord_variable_name(cube_or_mesh, element)
@@ -1749,6 +1796,9 @@ def _create_generic_cf_array_var(
17491796
# (e.g. =points if a coord, =data if an ancillary, etc)
17501797
data = element._core_values()
17511798

1799+
if cube is None or cube.shape != data.shape:
1800+
compression_kwargs = {}
1801+
17521802
if np.issubdtype(data.dtype, np.str_):
17531803
# Deal with string-type variables.
17541804
# Typically CF label variables, but also possibly ancil-vars ?
@@ -1811,6 +1861,7 @@ def _create_generic_cf_array_var(
18111861
data.dtype.newbyteorder("="),
18121862
element_dims,
18131863
fill_value=fill_value,
1864+
**compression_kwargs,
18141865
)
18151866

18161867
# Add the axis attribute for spatio-temporal CF-netCDF coordinates.
@@ -1820,7 +1871,9 @@ def _create_generic_cf_array_var(
18201871
_setncattr(cf_var, "axis", axis.upper())
18211872

18221873
# Create the associated CF-netCDF bounds variable, if any.
1823-
self._create_cf_bounds(element, cf_var, cf_name)
1874+
self._create_cf_bounds(
1875+
element, cf_var, cf_name, compression_kwargs=compression_kwargs
1876+
)
18241877

18251878
# Add the data to the CF-netCDF variable.
18261879
self._lazy_stream_data(data=data, cf_var=cf_var)
@@ -2784,16 +2837,16 @@ def is_valid_packspec(p):
27842837
for cube, packspec, fill_value in zip(cubes, packspecs, fill_values):
27852838
sman.write(
27862839
cube,
2787-
local_keys,
2788-
unlimited_dimensions,
2789-
zlib,
2790-
complevel,
2791-
shuffle,
2792-
fletcher32,
2793-
contiguous,
2794-
chunksizes,
2795-
endian,
2796-
least_significant_digit,
2840+
local_keys=local_keys,
2841+
unlimited_dimensions=unlimited_dimensions,
2842+
zlib=zlib,
2843+
complevel=complevel,
2844+
shuffle=shuffle,
2845+
fletcher32=fletcher32,
2846+
contiguous=contiguous,
2847+
chunksizes=chunksizes,
2848+
endian=endian,
2849+
least_significant_digit=least_significant_digit,
27972850
packing=packspec,
27982851
fill_value=fill_value,
27992852
)

0 commit comments

Comments
 (0)