-
Notifications
You must be signed in to change notification settings - Fork 234
Allow passing region to GMTBackendEntrypoint.open_dataset #3932
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 6 commits
12c2662
3114987
72abcaf
3a04239
1a5837a
fe6bd44
6dec9ad
c2c010c
07b2802
5557b33
0006f2e
deb2df0
221b60c
11436ad
f8af963
ba73095
36fb6e0
ae785c7
c176154
b488b10
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,10 +7,8 @@ | |
| from typing import Any, Literal, NamedTuple | ||
|
|
||
| import xarray as xr | ||
| from pygmt.clib import Session | ||
| from pygmt.exceptions import GMTInvalidInput | ||
| from pygmt.helpers import build_arg_list, kwargs_to_strings | ||
| from pygmt.src import which | ||
| from pygmt.helpers import kwargs_to_strings | ||
|
|
||
| with contextlib.suppress(ImportError): | ||
| # rioxarray is needed to register the rio accessor | ||
|
|
@@ -581,22 +579,9 @@ def _load_remote_dataset( | |
| raise GMTInvalidInput(msg) | ||
|
|
||
| fname = f"@{prefix}_{resolution}_{reg}" | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see a lot of error messages like: This is because, in the GMT backend, we use something like
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, we used to do this: # Full path to the grid if not tiled grids.
source = which(fname, download="a") if not resinfo.tiled else None
# Manually add source to xarray.DataArray encoding to make the GMT accessors work.
if source:
grid.encoding["source"] = sourcei.e. only add the source for non-tiled grids, so that the accessor's
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think either works. Perhaps
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done in commit 5557b33. Edit: Also just realized that |
||
| kwdict = {"R": region, "T": {"grid": "g", "image": "i"}[dataset.kind]} | ||
| with Session() as lib: | ||
| with lib.virtualfile_out(kind=dataset.kind) as voutgrd: | ||
| lib.call_module( | ||
| module="read", | ||
| args=[fname, voutgrd, *build_arg_list(kwdict)], | ||
| ) | ||
| grid = lib.virtualfile_to_raster( | ||
| kind=dataset.kind, outgrid=None, vfname=voutgrd | ||
| ) | ||
|
|
||
| # Full path to the grid if not tiled grids. | ||
| source = which(fname, download="a") if not resinfo.tiled else None | ||
| # Manually add source to xarray.DataArray encoding to make the GMT accessors work. | ||
| if source: | ||
| grid.encoding["source"] = source | ||
| grid = xr.load_dataarray( | ||
| fname, engine="gmt", raster_kind=dataset.kind, region=region | ||
| ) | ||
|
|
||
| # Add some metadata to the grid | ||
| grid.attrs["description"] = dataset.description | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -144,7 +144,7 @@ def test_xarray_accessor_grid_source_file_not_exist(): | |
| # Registration and gtype are correct. | ||
| assert grid.gmt.registration == GridRegistration.PIXEL | ||
| assert grid.gmt.gtype == GridType.GEOGRAPHIC | ||
| # The source grid file is undefined. | ||
| # The source grid file is undefined for tiled grids. | ||
| assert grid.encoding.get("source") is None | ||
|
||
|
|
||
| # For a sliced grid, fallback to default registration and gtype, because the source | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -40,8 +40,8 @@ def test_xarray_backend_load_dataarray(): | |
|
|
||
| def test_xarray_backend_gmt_open_nc_grid(): | ||
| """ | ||
| Ensure that passing engine='gmt' to xarray.open_dataarray works for opening NetCDF | ||
| grids. | ||
| Ensure that passing engine='gmt' to xarray.open_dataarray works to open a netCDF | ||
| grid. | ||
| """ | ||
| with xr.open_dataarray( | ||
| "@static_earth_relief.nc", engine="gmt", raster_kind="grid" | ||
|
|
@@ -52,10 +52,29 @@ def test_xarray_backend_gmt_open_nc_grid(): | |
| assert da.gmt.registration == GridRegistration.PIXEL | ||
|
|
||
|
|
||
| def test_xarray_backend_gmt_open_nc_grid_with_region_bbox(): | ||
| """ | ||
| Ensure that passing engine='gmt' with a `region` argument to xarray.open_dataarray | ||
| works to open a netCDF grid over a specific bounding box. | ||
| """ | ||
| with xr.open_dataarray( | ||
| "@static_earth_relief.nc", | ||
| engine="gmt", | ||
| raster_kind="grid", | ||
| region=[-52, -48, -18, -12], | ||
| ) as da: | ||
| assert da.sizes == {"lat": 6, "lon": 4} | ||
| npt.assert_allclose(da.lat, [-17.5, -16.5, -15.5, -14.5, -13.5, -12.5]) | ||
| npt.assert_allclose(da.lon, [-51.5, -50.5, -49.5, -48.5]) | ||
| assert da.dtype == "float32" | ||
| assert da.gmt.gtype == GridType.GEOGRAPHIC | ||
| assert da.gmt.registration == GridRegistration.PIXEL | ||
|
|
||
|
|
||
| def test_xarray_backend_gmt_open_tif_image(): | ||
| """ | ||
| Ensure that passing engine='gmt' to xarray.open_dataarray works for opening GeoTIFF | ||
| images. | ||
| Ensure that passing engine='gmt' to xarray.open_dataarray works to open a GeoTIFF | ||
| image. | ||
| """ | ||
| with xr.open_dataarray("@earth_day_01d", engine="gmt", raster_kind="image") as da: | ||
| assert da.sizes == {"band": 3, "y": 180, "x": 360} | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Coordinate names are |
||
|
|
@@ -64,6 +83,22 @@ def test_xarray_backend_gmt_open_tif_image(): | |
| assert da.gmt.registration == GridRegistration.PIXEL | ||
|
|
||
|
|
||
| def test_xarray_backend_gmt_open_tif_image_with_region_iso(): | ||
| """ | ||
| Ensure that passing engine='gmt' with a `region` argument to xarray.open_dataarray | ||
| works to open a GeoTIFF image over a specific ISO country code border. | ||
| """ | ||
| with xr.open_dataarray( | ||
| "@earth_day_01d", engine="gmt", raster_kind="image", region="BN" | ||
| ) as da: | ||
| assert da.sizes == {"band": 3, "lat": 2, "lon": 2} | ||
| npt.assert_allclose(da.lat, [5.5, 4.5]) | ||
| npt.assert_allclose(da.lon, [114.5, 115.5]) | ||
| assert da.dtype == "uint8" | ||
| assert da.gmt.gtype == GridType.GEOGRAPHIC | ||
| assert da.gmt.registration == GridRegistration.PIXEL | ||
|
|
||
|
|
||
| def test_xarray_backend_gmt_load_grd_grid(): | ||
| """ | ||
| Ensure that passing engine='gmt' to xarray.load_dataarray works for loading GRD | ||
|
|
@@ -88,9 +123,7 @@ def test_xarray_backend_gmt_read_invalid_kind(): | |
| """ | ||
| with pytest.raises( | ||
| TypeError, | ||
| match=re.escape( | ||
| "GMTBackendEntrypoint.open_dataset() missing 1 required keyword-only argument: 'raster_kind'" | ||
| ), | ||
| match=re.escape("missing a required argument: 'raster_kind'"), | ||
| ): | ||
| xr.open_dataarray("nokind.nc", engine="gmt") | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -2,13 +2,14 @@ | |||||||||||||||||||||||
| An xarray backend for reading raster grid/image files using the 'gmt' engine. | ||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| from collections.abc import Sequence | ||||||||||||||||||||||||
| from typing import Literal | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| import xarray as xr | ||||||||||||||||||||||||
| from pygmt._typing import PathLike | ||||||||||||||||||||||||
| from pygmt.clib import Session | ||||||||||||||||||||||||
| from pygmt.exceptions import GMTInvalidInput | ||||||||||||||||||||||||
| from pygmt.helpers import build_arg_list | ||||||||||||||||||||||||
| from pygmt.helpers import build_arg_list, kwargs_to_strings | ||||||||||||||||||||||||
| from pygmt.src.which import which | ||||||||||||||||||||||||
| from xarray.backends import BackendEntrypoint | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
|
|
@@ -71,15 +72,17 @@ class GMTBackendEntrypoint(BackendEntrypoint): | |||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| description = "Open raster (.grd, .nc or .tif) files in Xarray via GMT." | ||||||||||||||||||||||||
| open_dataset_parameters = ("filename_or_obj", "raster_kind") | ||||||||||||||||||||||||
| open_dataset_parameters = ("filename_or_obj", "raster_kind", "region") | ||||||||||||||||||||||||
| url = "https://pygmt.org/dev/api/generated/pygmt.GMTBackendEntrypoint.html" | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| @kwargs_to_strings(region="sequence") | ||||||||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've been think if we should avoid using the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Probably best to open a separate issue/PR for this. |
||||||||||||||||||||||||
| def open_dataset( # type: ignore[override] | ||||||||||||||||||||||||
| self, | ||||||||||||||||||||||||
| filename_or_obj: PathLike, | ||||||||||||||||||||||||
| *, | ||||||||||||||||||||||||
| drop_variables=None, # noqa: ARG002 | ||||||||||||||||||||||||
| raster_kind: Literal["grid", "image"], | ||||||||||||||||||||||||
| region: Sequence[float] | str | None = None, | ||||||||||||||||||||||||
| # other backend specific keyword arguments | ||||||||||||||||||||||||
| # `chunks` and `cache` DO NOT go here, they are handled by xarray | ||||||||||||||||||||||||
| ) -> xr.Dataset: | ||||||||||||||||||||||||
|
|
@@ -94,14 +97,17 @@ def open_dataset( # type: ignore[override] | |||||||||||||||||||||||
| :gmt-docs:`reference/features.html#grid-file-format`. | ||||||||||||||||||||||||
| raster_kind | ||||||||||||||||||||||||
| Whether to read the file as a "grid" (single-band) or "image" (multi-band). | ||||||||||||||||||||||||
| region | ||||||||||||||||||||||||
| Optional. The subregion of the grid or image to load, in the form of a | ||||||||||||||||||||||||
| sequence [*xmin*, *xmax*, *ymin*, *ymax*] or an ISO country code. | ||||||||||||||||||||||||
weiji14 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||
| if raster_kind not in {"grid", "image"}: | ||||||||||||||||||||||||
| msg = f"Invalid raster kind: '{raster_kind}'. Valid values are 'grid' or 'image'." | ||||||||||||||||||||||||
| raise GMTInvalidInput(msg) | ||||||||||||||||||||||||
|
|
||||||||||||||||||||||||
| with Session() as lib: | ||||||||||||||||||||||||
| with lib.virtualfile_out(kind=raster_kind) as voutfile: | ||||||||||||||||||||||||
| kwdict = {"T": {"grid": "g", "image": "i"}[raster_kind]} | ||||||||||||||||||||||||
| kwdict = {"R": region, "T": {"grid": "g", "image": "i"}[raster_kind]} | ||||||||||||||||||||||||
| lib.call_module( | ||||||||||||||||||||||||
| module="read", | ||||||||||||||||||||||||
| args=[filename_or_obj, voutfile, *build_arg_list(kwdict)], | ||||||||||||||||||||||||
|
|
@@ -111,7 +117,7 @@ def open_dataset( # type: ignore[override] | |||||||||||||||||||||||
| vfname=voutfile, kind=raster_kind | ||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
| # Add "source" encoding | ||||||||||||||||||||||||
| source = which(fname=filename_or_obj) | ||||||||||||||||||||||||
| source: str | list = which(fname=filename_or_obj) | ||||||||||||||||||||||||
| raster.encoding["source"] = ( | ||||||||||||||||||||||||
| source[0] if isinstance(source, list) else source | ||||||||||||||||||||||||
weiji14 marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||||||||||||||||||||||||
| ) | ||||||||||||||||||||||||
|
||||||||||||||||||||||||
| # Set GMT accessors. | |
| # Must put at the end, otherwise info gets lost after certain grid operations. | |
| grid.gmt.registration = header.registration | |
| grid.gmt.gtype = header.gtype | |
| return grid |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
that's a good point. please try remove it and see if everything works fine.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is one extra test failure at https://github.com/GenericMappingTools/pygmt/actions/runs/14743641047/job/41386753150#step:10:1049:
_____________ [doctest] pygmt.xarray.accessor.GMTDataArrayAccessor _____________
026 Examples
027 --------
028 For GMT's built-in remote datasets, these GMT-specific properties are automatically
029 determined and you can access them as follows:
030
031 >>> from pygmt.datasets import load_earth_relief
032 >>> # Use the global Earth relief grid with 1 degree spacing
033 >>> grid = load_earth_relief(resolution="01d", registration="pixel")
034 >>> # See if grid uses Gridline or Pixel registration
035 >>> grid.gmt.registration
Expected:
<GridRegistration.PIXEL: 1>
Got:
1I think this might be because the _GMT_GRID_HEADER.registration property returns an int instead of an enum?
pygmt/pygmt/datatypes/header.py
Lines 81 to 82 in d29303b
| # Grid registration, 0 for gridline and 1 for pixel | |
| ("registration", ctp.c_uint32), |
and we overrode grid.gmt.registration with 1 instead of <GridRegistration.PIXEL: 1>. Should be a quick fix we can do in a separate PR. Edit: no, the GMTDataArrayAccessor registration property should always return an enum, not an int, something else in this PR seems to be affecting this doctest...
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Making the following changes should fix the issue:
pygmt/pygmt/xarray/accessor.py
Lines 139 to 142 in 33fadc3
| with contextlib.suppress(ValueError): | |
| self._registration, self._gtype = map( # type: ignore[assignment] | |
| int, grdinfo(_source, per_column="n").split()[-2:] | |
| ) |
if (_source := self._obj.encoding.get("source")) and Path(_source).exists():
with contextlib.suppress(ValueError):
registration, gtype = map( # type: ignore[assignment]
int, grdinfo(_source, per_column="n").split()[-2:]
)
self._registration = GridRegistration(registration)
self._gtype = GridType(gtype)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done in commit 07b2802, and also we can remove the # type: ignore[assignment] mypy skip 🎉
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we should declare it an uncaught bug of the previous implementation. In our tests, we have checks like
assert grid.gmt.registration == GridRegistration.GRIDLINE
It's not enough, since the assertion is true when grid.gmt.registration is GridRegistration.GRIDLINE or 0. I think we should improve the existing tests and ensure that .registration and .gtype are in enums, not int, and this should be done in a separate PR.
The test below shows the current, inconsistent behavior:
>>> from pygmt.datasets import load_earth_relief
>>> grid = load_earth_relief(resolution="01d", registration="pixel")
>>> grid.gmt.registration
<GridRegistration.PIXEL: 1>
>>> type(grid.gmt.registration)
<enum 'GridRegistration'>
>>> grid2 = grid[0:10, 0:10]
>>> grid2.gmt.registration
1
>>> type(grid2.gmt.registration)
int
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems like enum comparison should be done using is instead of == according to https://docs.python.org/3/howto/enum.html#comparisons (see also https://stackoverflow.com/questions/25858497/should-enum-instances-be-compared-by-identity-or-equality).
assert grid.gmt.registration is enums.GridRegistration.PIXEL # OK
assert 1 is enums.GridRegistration.PIXEL # AssertionError<>:1: SyntaxWarning: "is" with a literal. Did you mean "=="?Wish there was a ruff lint for this (xref astral-sh/ruff#11617), but until then, will need to fix it manually.
Edit: PR at #3942
Uh oh!
There was an error while loading. Please reload this page.