diff --git a/doc/api/index.rst b/doc/api/index.rst index 618217468c2..d9e8b46de72 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -172,6 +172,7 @@ Input/output :toctree: generated load_dataarray + read GMT Defaults ------------ diff --git a/pygmt/__init__.py b/pygmt/__init__.py index 652c6c78712..5bdc5fbdd5f 100644 --- a/pygmt/__init__.py +++ b/pygmt/__init__.py @@ -24,7 +24,7 @@ from pygmt import datasets from pygmt._show_versions import __commit__, __version__, show_versions from pygmt.figure import Figure, set_display -from pygmt.io import load_dataarray +from pygmt.io import gmtread, load_dataarray from pygmt.session_management import begin as _begin from pygmt.session_management import end as _end from pygmt.src import ( diff --git a/pygmt/helpers/testing.py b/pygmt/helpers/testing.py index 3e47263dbae..98cedd17b8b 100644 --- a/pygmt/helpers/testing.py +++ b/pygmt/helpers/testing.py @@ -144,13 +144,13 @@ def wrapper(*args, ext="png", request=None, **kwargs): return decorator -def load_static_earth_relief(): +def load_static_earth_relief() -> xr.DataArray: """ - Load the static_earth_relief file for internal testing. + Load the static_earth_relief.nc file for internal testing. Returns ------- - data : xarray.DataArray + data A grid of Earth relief for internal tests. """ fname = which("@static_earth_relief.nc", download="c") diff --git a/pygmt/io/__init__.py b/pygmt/io/__init__.py new file mode 100644 index 00000000000..964a5d59a3a --- /dev/null +++ b/pygmt/io/__init__.py @@ -0,0 +1,6 @@ +""" +PyGMT input/output (I/O) utilities. +""" + +from pygmt.io.gmtread import gmtread +from pygmt.io.load_dataarray import load_dataarray diff --git a/pygmt/io/gmtread.py b/pygmt/io/gmtread.py new file mode 100644 index 00000000000..564354e8720 --- /dev/null +++ b/pygmt/io/gmtread.py @@ -0,0 +1,125 @@ +""" +Read a file into an appropriate object. +""" + +from collections.abc import Mapping, Sequence +from pathlib import PurePath +from typing import Any, Literal + +import pandas as pd +import xarray as xr +from pygmt.clib import Session +from pygmt.helpers import build_arg_list, is_nonstr_iter +from pygmt.src.which import which + + +def gmtread( + file: str | PurePath, + kind: Literal["dataset", "grid", "image"], + region: Sequence[float] | str | None = None, + header: int | None = None, + column_names: pd.Index | None = None, + dtype: type | Mapping[Any, type] | None = None, + index_col: str | int | None = None, +) -> pd.DataFrame | xr.DataArray: + """ + Read a dataset, grid, or image from a file and return the appropriate object. + + The returned object is a :class:`pandas.DataFrame` for datasets, and + :class:`xarray.DataArray` for grids and images. + + For datasets, keyword arguments ``column_names``, ``header``, ``dtype``, and + ``index_col`` are supported. + + Parameters + ---------- + file + The file name to read. + kind + The kind of data to read. Valid values are ``"dataset"``, ``"grid"``, and + ``"image"``. + region + The region of interest. Only data within this region will be read. + column_names + A list of column names. + header + Row number containing column names. ``header=None`` means not to parse the + column names from table header. Ignored if the row number is larger than the + number of headers in the table. + dtype + Data type. Can be a single type for all columns or a dictionary mapping + column names to types. + index_col + Column to set as index. + + Returns + ------- + data + Return type depends on the ``kind`` argument: + + - ``"dataset"``: :class:`pandas.DataFrame` + - ``"grid"`` or ``"image"``: :class:`xarray.DataArray` + + + Examples + -------- + Read a dataset into a :class:`pandas.DataFrame` object: + + >>> from pygmt import gmtread + >>> df = gmtread("@hotspots.txt", kind="dataset") + >>> type(df) + + + Read a grid into an :class:`xarray.DataArray` object: + + >>> dataarray = gmtread("@earth_relief_01d", kind="grid") + >>> type(dataarray) + + + Read an image into an :class:`xarray.DataArray` object: + >>> image = gmtread("@earth_day_01d", kind="image") + >>> type(image) + + """ + if kind not in {"dataset", "grid", "image"}: + msg = f"Invalid kind '{kind}': must be one of 'dataset', 'grid', or 'image'." + raise ValueError(msg) + + if kind != "dataset" and any( + v is not None for v in [column_names, header, dtype, index_col] + ): + msg = ( + "Only the 'dataset' kind supports the 'column_names', 'header', 'dtype', " + "and 'index_col' arguments." + ) + raise ValueError(msg) + + kwdict = { + "R": "/".join(f"{v}" for v in region) if is_nonstr_iter(region) else region, # type: ignore[union-attr] + "T": {"dataset": "d", "grid": "g", "image": "i"}[kind], + } + + with Session() as lib: + with lib.virtualfile_out(kind=kind) as voutfile: + lib.call_module( + module="read", args=[file, voutfile, *build_arg_list(kwdict)] + ) + + match kind: + case "dataset": + return lib.virtualfile_to_dataset( + vfname=voutfile, + column_names=column_names, + header=header, + dtype=dtype, + index_col=index_col, + ) + case "grid" | "image": + raster = lib.virtualfile_to_raster(vfname=voutfile, kind=kind) + # Add "source" encoding + source = which(fname=file) + raster.encoding["source"] = ( + source[0] if isinstance(source, list) else source + ) + _ = raster.gmt # Load GMTDataArray accessor information + return raster diff --git a/pygmt/io.py b/pygmt/io/load_dataarray.py similarity index 97% rename from pygmt/io.py rename to pygmt/io/load_dataarray.py index 0e7c560ae21..8fdf043010e 100644 --- a/pygmt/io.py +++ b/pygmt/io/load_dataarray.py @@ -1,5 +1,5 @@ """ -PyGMT input/output (I/O) utilities. +Load xarray.DataArray from a file or file-like object. """ import warnings diff --git a/pygmt/tests/test_datatypes_dataset.py b/pygmt/tests/test_datatypes_dataset.py index 56f18143035..2fe2d92ac62 100644 --- a/pygmt/tests/test_datatypes_dataset.py +++ b/pygmt/tests/test_datatypes_dataset.py @@ -6,8 +6,7 @@ import pandas as pd import pytest -from pygmt import which -from pygmt.clib import Session +from pygmt import gmtread, which from pygmt.helpers import GMTTempFile @@ -44,11 +43,7 @@ def dataframe_from_gmt(fname, **kwargs): """ Read tabular data as pandas.DataFrame using GMT virtual file. """ - with Session() as lib: - with lib.virtualfile_out(kind="dataset") as vouttbl: - lib.call_module("read", [fname, vouttbl, "-Td"]) - df = lib.virtualfile_to_dataset(vfname=vouttbl, **kwargs) - return df + return gmtread(fname, kind="dataset", **kwargs) @pytest.mark.benchmark diff --git a/pygmt/tests/test_io_gmtread.py b/pygmt/tests/test_io_gmtread.py new file mode 100644 index 00000000000..46b43dabf9f --- /dev/null +++ b/pygmt/tests/test_io_gmtread.py @@ -0,0 +1,61 @@ +""" +Test the gmtread function. +""" + +import importlib + +import numpy as np +import pytest +import rioxarray +import xarray as xr +from pygmt import gmtread, which + +_HAS_NETCDF4 = bool(importlib.util.find_spec("netCDF4")) +_HAS_RIORASTERIO = bool(importlib.util.find_spec("rioxarray")) + + +@pytest.mark.skipif(not _HAS_NETCDF4, reason="netCDF4 is not installed.") +def test_io_gmtread_grid(): + """ + Test that reading a grid returns an xr.DataArray and the grid is the same as the one + loaded via xarray.load_dataarray. + """ + grid = gmtread("@static_earth_relief.nc", kind="grid") + assert isinstance(grid, xr.DataArray) + expected_grid = xr.load_dataarray(which("@static_earth_relief.nc", download="a")) + assert np.allclose(grid, expected_grid) + + +@pytest.mark.skipif(not _HAS_RIORASTERIO, reason="rioxarray is not installed.") +def test_io_gmtread_image(): + """ + Test that reading an image returns an xr.DataArray. + """ + image = gmtread("@earth_day_01d", kind="image") + assert isinstance(image, xr.DataArray) + with rioxarray.open_rasterio( + which("@earth_day_01d", download="a") + ) as expected_image: + assert np.allclose(image, expected_image) + + +def test_io_gmtread_invalid_kind(): + """ + Test that an invalid kind raises a ValueError. + """ + with pytest.raises(ValueError, match="Invalid kind"): + gmtread("file.cpt", kind="cpt") + + +def test_io_gmtread_invalid_arguments(): + """ + Test that invalid arguments raise a ValueError for non-'dataset' kind. + """ + with pytest.raises(ValueError, match="Only the 'dataset' kind supports"): + gmtread("file.nc", kind="grid", column_names="foo") + + with pytest.raises(ValueError, match="Only the 'dataset' kind supports"): + gmtread("file.nc", kind="grid", header=1) + + with pytest.raises(ValueError, match="Only the 'dataset' kind supports"): + gmtread("file.nc", kind="grid", dtype="float") diff --git a/pygmt/tests/test_io.py b/pygmt/tests/test_io_load_dataarray.py similarity index 100% rename from pygmt/tests/test_io.py rename to pygmt/tests/test_io_load_dataarray.py