diff --git a/Project.toml b/Project.toml index 5c03866a..99ddbbef 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Rasters" uuid = "a3a2b9e3-a471-40c9-b274-f788e487c689" -authors = ["Rafael Schouten "] version = "0.14.6" +authors = ["Rafael Schouten "] [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" @@ -16,15 +16,18 @@ FillArrays = "1a297f60-69ca-5386-bcde-b61e274b549b" Flatten = "4c728ea3-d9ee-5c9a-9642-b6f7d7dc04fa" GeoFormatTypes = "68eda718-8dee-11e9-39e7-89f7f65f511f" GeoInterface = "cf35fbd7-0cd7-5166-be24-54bfbe79505f" +GeometryOps = "3251bfac-6a57-4b6d-aa61-ac1fef2975ab" GeometryOpsCore = "05efe853-fabf-41c8-927e-7063c8b9f013" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Missings = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" +OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca" RecipesBase = "3cdcf5f2-1ef4-517c-9805-6587b60abb01" Reexport = "189a3867-3050-52da-a836-e630ba90ab69" Setfield = "efcf1570-3423-57d1-acb7-fd33fddbac46" +SortTileRecursiveTree = "746ee33f-1797-42c2-866d-db2fce69d14d" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" [weakdeps] @@ -69,11 +72,13 @@ GeoDataFrames = "0.3" GeoFormatTypes = "0.4" GeoInterface = "1.0" GeometryBasics = "0.4, 0.5" +GeometryOps = "0.1.24" GeometryOpsCore = "0.1.1" Makie = "0.20, 0.21, 0.22, 0.23, 0.24" Missings = "0.4, 1" NCDatasets = "0.13, 0.14" OffsetArrays = "1" +OrderedCollections = "1.8.1" Plots = "1" ProgressMeter = "1" Proj = "1.7.2" @@ -83,6 +88,7 @@ Reexport = "0.2, 1.0" SafeTestsets = "0.1" Setfield = "0.6, 0.7, 0.8, 1" Shapefile = "0.10, 0.11" +SortTileRecursiveTree = "0.1.3" Statistics = "1" StatsBase = "0.34" Test = "1" @@ -100,6 +106,7 @@ GeoDataFrames = "62cb38b5-d8d2-4862-a48e-6a340996859f" GeometryBasics = "5c1252a2-5f33-56bf-86c9-59e7332b4326" Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a" NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +NaturalEarth = "436b0209-26ab-4e65-94a9-6526d86fea76" Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80" Proj = "c94c279d-25a6-4763-9509-64d165bea63e" RasterDataSources = "3cb90ccd-e1b6-4867-9617-4276c8b2ca36" @@ -111,4 +118,4 @@ StatsBase = "2913bbd2-ae8a-5f71-8c99-4fb6c76f3a91" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeoDataFrames", "GeometryBasics", "GRIBDatasets", "Makie", "NCDatasets", "Plots", "Proj", "RasterDataSources", "SafeTestsets", "Shapefile", "StableRNGs", "StatsBase", "Test", "ZarrDatasets"] +test = ["Aqua", "ArchGDAL", "CFTime", "CoordinateTransformations", "DataFrames", "GeoDataFrames", "GeometryBasics", "GRIBDatasets", "Makie", "NaturalEarth", "NCDatasets", "Plots", "Proj", "RasterDataSources", "SafeTestsets", "Shapefile", "StableRNGs", "StatsBase", "Test", "ZarrDatasets"] diff --git a/src/Rasters.jl b/src/Rasters.jl index 7480ae53..77680a22 100644 --- a/src/Rasters.jl +++ b/src/Rasters.jl @@ -21,14 +21,17 @@ import Adapt, FillArrays, Flatten, GeoInterface, + GeometryOps, GeometryOpsCore, OffsetArrays, + OrderedCollections, ProgressMeter, Missings, Mmap, RecipesBase, Reexport, Setfield, + SortTileRecursiveTree, Statistics Reexport.@reexport using DimensionalData, GeoFormatTypes @@ -76,6 +79,7 @@ export Extent, extent const DD = DimensionalData const DA = DiskArrays const GI = GeoInterface +const GO = GeometryOps const LA = Lookups # DimensionalData documentation urls @@ -119,6 +123,8 @@ const RasterSeriesOrStack = Union{AbstractRasterSeries,AbstractRasterStack} include("utils.jl") include("skipmissing.jl") +include("geometry_lookup/geometry_lookup.jl") + include("table_ops.jl") include("create.jl") include("read.jl") diff --git a/src/geometry_lookup/geometry_lookup.jl b/src/geometry_lookup/geometry_lookup.jl new file mode 100644 index 00000000..534d3e4e --- /dev/null +++ b/src/geometry_lookup/geometry_lookup.jl @@ -0,0 +1,342 @@ +""" + GeometryLookup(data, dims = (X(), Y()); geometrycolumn = nothing) + +A lookup type for geometry dimensions in vector data cubes. + +`GeometryLookup` provides efficient spatial indexing and lookup for +geometries using an STRtree (Sort-Tile-Recursive tree). + +It is used as the lookup type for geometry dimensions in vector +data cubes, enabling fast spatial queries and operations. + +It spans the dimensions given to it in `dims`, as well as the dimension + it's wrapped in - you would construct a DimArray with a GeometryLookup +like `DimArray(data, Geometry(GeometryLookup(data, dims)))`. +Here, `Geometry` is a dimension - but selectors in X and Y will also work! + +# Examples + +```julia +using Rasters + +using NaturalEarth +import GeometryOps as GO + +# construct the polygon lookup +polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry +polygon_lookup = GeometryLookup(polygons, (X(), Y())) + +# create a DimArray with the polygon lookup +dv = rand(Geometry(polygon_lookup)) + +# select the polygon with the centroid of the 88th polygon +dv[Geometry(Contains(GO.centroid(polygons[88])))] == dv[Geometry(88)] # true +``` +""" +struct GeometryLookup{T,A<:AbstractVector{T},D,M<:GO.Manifold,Tree,CRS} <: DD.Dimensions.Lookup{T, 1} + manifold::M + data::A + tree::Tree + dims::D + crs::CRS +end +function GeometryLookup(data, dims=(X(), Y()); geometrycolumn=nothing, crs=nokw, tree=nokw) + # First, retrieve the geometries - from a table, vector of geometries, etc. + geometries = _get_geometries(data, geometrycolumn) + geometries = Missings.disallowmissing(geometries) + + if isnokw(crs) + crs = GI.crs(data) + if isnothing(crs) + crs = GI.crs(first(geometries)) + end + end + + # Check that the geometries are actually geometries + if any(!GI.isgeometry, geometries) + throw(ArgumentError(""" + The collection passed in to `GeometryLookup` has some elements that are not geometries + (`GI.isgeometry(x) == false` for some `x` in `data`). + """)) + end + # Make sure there are only two dimensions + if length(dims) != 2 + throw(ArgumentError(""" + The `dims` argument to `GeometryLookup` must have two dimensions, but it has $(length(dims)) dimensions (`$(dims)`). + Please make sure that it has only two dimensions, like `(X(), Y())`. + """)) + end + # Build the lookup accelerator tree + tree = if isnokw(tree) + SortTileRecursiveTree.STRtree(geometries) + elseif GO.SpatialTreeInterface.isspatialtree(tree) + if tree isa DataType + tree(geometries) + else + tree + end + elseif isnothing(tree) + nothing + else + throw(ArgumentError(""" + Got an argument for `tree` which is not a valid spatial tree (according to `GeometryOps.SpatialTreeInterface`) + nor `nokw` or `nothing` + + Type is $(typeof(tree)) + """)) + end + # TODO: auto manifold detection and best tree type for that manifold + GeometryLookup(GO.Planar(), geometries, tree, dims, crs) +end + +GeoInterface.crs(l::GeometryLookup) = l.crs +setcrs(l::GeometryLookup, crs) = rebuild(l; crs) + +#= + +## DD methods for the lookup + +Here we define DimensionalData's methods for the lookup. +This is broadly standard except for the `rebuild` method, which is used to update the tree accelerator when the data changes. +=# + +DD.dims(l::GeometryLookup) = l.dims +# This has to return itself +# DD.dims(d::DD.Dimension{<:GeometryLookup}) = dims(val(d)) +DD.order(::GeometryLookup) = Lookups.Unordered() +DD.parent(lookup::GeometryLookup) = lookup.data +# TODO: format for geometry lookup +DD.Dimensions.format(l::GeometryLookup, D::Type, values, axis::AbstractRange) = l + +# Make sure that the tree is rebuilt if the data changes +function DD.rebuild( + lookup::GeometryLookup; + data=lookup.data, tree=nokw, + dims=lookup.dims, crs=nokw, + manifold=nokw, metadata=nokw + ) + # TODO: metadata support for geometry lookup + new_tree = if isnokw(tree) + if data == lookup.data + lookup.tree + elseif isempty(data) + nothing + else + SortTileRecursiveTree.STRtree(data) + end + elseif GO.SpatialTreeInterface.isspatialtree(tree) + if tree isa DataType + tree(data) + else + tree + end + else + SortTileRecursiveTree.STRtree(data) + end + new_crs = if isnokw(crs) + data_crs = GI.crs(data) + if isnothing(data_crs) + lookup.crs + else + data_crs + end + else + crs + end + + new_manifold = isnokw(manifold) ? lookup.manifold : manifold + + return GeometryLookup(new_manifold, Missings.disallowmissing(data), new_tree, dims, new_crs) +end + +# # Bounds - get the bounds of the lookup +# function Lookups.bounds(lookup::GeometryLookup) +# if isempty(lookup.data) +# Extents.Extent(NamedTuple{DD.name.(lookup.dims)}(ntuple(2) do i; (nothing, nothing); end)) +# else +# if isnothing(lookup.tree) +# mapreduce(GI.extent, Extents.union, lookup.data) +# else +# GI.extent(lookup.tree) +# end +# end +# end + +# Return an `Int` or Vector{Bool} +# Base case: got a standard index that can go into getindex on a base Array +Lookups.selectindices(lookup::GeometryLookup, sel::Lookups.StandardIndices) = sel +# other cases: +# - decompose selectors +function Lookups.selectindices(lookup::GeometryLookup, sel::DD.DimTuple) + selectindices(lookup, map(_val_or_nothing, sortdims(sel, dims(lookup)))) +end +function Lookups.selectindices(lookup::GeometryLookup, sel::NamedTuple{K}) where K + dimsel = map(rebuild, map(name2dim, K), values(sel)) + selectindices(lookup, dimsel) +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Tuple) + if (length(sel) == length(dims(lookup))) && all(map(s -> s isa At, sel)) + i = findfirst(x -> all(map(Dimensions._matches, sel, x)), lookup) + isnothing(i) && _coord_not_found_error(sel) + return i + else + return [Dimensions._matches(sel, x) for x in lookup] + end +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Contains) + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], val(sel)) + end +end +function Lookups.selectindices(lookup::GeometryLookup, sel::At) + @assert GI.isgeometry(geom) + candidates = _maybe_get_candidates(lookup, GI.extent(val(sel))) + x = findfirst(candiates) do candidate + GO.equal(val(at), candidate) + end + if isnothing(x) + throw(ArgumentError("$sel not found in lookup")) + else + return x + end +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Near) + geom = val(sel) + @assert GI.isgeometry(geom) + # TODO: temporary + @assert GI.trait(geom) isa GI.PointTrait "Only point geometries are supported for the near lookup at this point! We will add more geometry support in the future." + + # Get the nearest geometry + # TODO: this sucks! Use some branch and bound algorithm + # on the spatial tree instead. + # if pointtrait + return findmin(x -> GO.distance(geom, x), lookup.data)[2] + # else + # findmin(x -> GO.distance(GO.GEOS(), geom, x), lookup.data)[2] + # end + # this depends on LibGEOS being installed. + +end +function Lookups.selectindices(lookup::GeometryLookup, sel::Touches) + sel_ext = GI.extent(val(sel)) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], val(sel)) + end +end +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:Touches}, Union{<:Touches}} +) + target_ext = Extents.Extent(X = (first(xs), last(xs)), Y = (first(ys), last(ys))) + potential_candidates = _maybe_get_candidates(lookup, target_ext) + return filter(potential_candidates) do candidate + GO.intersects(lookup.data[candidate], target_ext) + end +end +function Lookups.selectindices( + lookup::GeometryLookup, + (xs, ys)::Tuple{Union{<:DD.IntervalSets.ClosedInterval},Union{<:DD.IntervalSets.ClosedInterval}} +) + target_ext = Extents.Extent(X = extrema(xs), Y = extrema(ys)) + potential_candidates = _maybe_get_candidates(lookup, target_ext) + filter(potential_candidates) do candidate + GO.covers(target_ext, lookup.data[candidate]) + end +end +function Lookups.selectindices( + lookup::GeometryLookup, + (x, y)::Tuple{Union{<:At,<:Contains}, Union{<:At,<:Contains}} +) + xval, yval = val(x), val(y) + lookup_ext = Lookups.bounds(lookup) + + # This is a specialized implementation to save on lookups + if !(lookup_ext.X[1] <= xval <= lookup_ext.X[2] && lookup_ext.Y[1] <= yval <= lookup_ext.Y[2]) + return Int[] + else # within extent + potential_candidates = GO.SpatialTreeInterface.query(lookup.tree, (xval, yval)) + isempty(potential_candidates) && return Int[] + # If both selectors are At(), return a single index (first match) for speed and clarity + if x isa At && y isa At + for candidate in potential_candidates + if GO.contains(lookup.data[candidate], (xval, yval)) + return candidate + end + end + # No match found within bounds + throw(ArgumentError("Point ($xval, $yval) not found in lookup")) + else + # For Contains selectors, return all matching indices + filter(potential_candidates) do candidate + GO.contains(lookup.data[candidate], (xval, yval)) + end + end + end +end + +for fname in (:equals, :intersects, + :contains, :within, :covers, + :coveredby, :touches) + @eval begin + function Lookups.selectindices(lookup::GeometryLookup, sel::Where{Base.Fix2{typeof(GO.$fname)}}) + sel_ext = GI.extent(val(sel).x) + potential_candidates = _maybe_get_candidates(lookup, sel_ext) + f = val(sel) + return filter(potential_candidates) do idx + f(lookup.data[idx]) + end + end + end +end +# Disjoint needs a specialized implementation, which will look at intersects instead. +function Lookups.selectindices(lookup::GeometryLookup, sel::Where{Base.Fix2{typeof(GO.disjoint)}}) + sel_ext = GI.extent(val(sel).x) + potential_candidates = _maybe_get_candidates(lookup, sel_ext, Extents.intersects) + f = val(sel) + actual_intersections = filter(potential_candidates) do idx + f(lookup.data[idx]) + end + return setdiff(1:length(lookup.data), actual_intersections) +end + +@inline Lookups.reducelookup(l::GeometryLookup) = NoLookup(OneTo(1)) + +function Lookups.show_properties(io::IO, mime, lookup::GeometryLookup) + print(io, " ") + show(IOContext(io, :inset => "", :dimcolor => 244), mime, DD.basedims(lookup)) +end + +# Dimension methods + +@inline _reducedims(lookup::GeometryLookup, dim::DD.Dimension) = + rebuild(dim, [map(x -> zero(x), dim.val[1])]) + +function DD.format(dim::DD.Dimension{<:GeometryLookup}, axis::AbstractRange) + checkaxis(dim, axis) + return dim +end + +# Local functions +_val_or_nothing(::Nothing) = nothing +_val_or_nothing(d::DD.Dimension) = val(d) + +# Get the candidates for the selector extent. +# If the selector extent is disjoint from the tree rootnode extent, +# you should raise an error. We should have an error type that can be +# plotted etc. to allow debugging and understanding. +function _maybe_get_candidates(lookup::GeometryLookup, selector_extent, operation::O) where O + tree = lookup.tree + isnothing(tree) && return 1:length(lookup) + Extents.disjoint(GI.extent(tree), selector_extent) && return Int[] + potential_candidates = GO.SpatialTreeInterface.query( + tree, + Base.Fix1(operation, selector_extent) + ) + isempty(potential_candidates) && return Int[] + return potential_candidates +end + +_maybe_get_candidates(lookup::GeometryLookup, selector_extent) = _maybe_get_candidates(lookup, selector_extent, Extents.intersects) \ No newline at end of file diff --git a/test/geometry_lookup.jl b/test/geometry_lookup.jl new file mode 100644 index 00000000..2e55e1f2 --- /dev/null +++ b/test/geometry_lookup.jl @@ -0,0 +1,51 @@ +using NaturalEarth +using Test + +using Rasters, DimensionalData +using Rasters.Lookups +import Proj +import GeometryOps as GO, GeoInterface as GI +using Extents + +import NCDatasets +import DimensionalData as DD + +@testset "construction" begin + # fetch land polygons from Natural Earth + country_polygons = NaturalEarth.naturalearth("admin_0_countries", 110).geometry + # create a DimVector from the land polygons + gl = GeometryLookup(country_polygons) + @test crs(gl) == EPSG(4326) + @test all(GO.equals, zip(val(gl), country_polygons)) +end + +@testset "Interacting with the geometry lookup" begin + # fetch land polygons from Natural Earth + country_fc = NaturalEarth.naturalearth("admin_0_countries", 110) + country_polygons = country_fc.geometry + # create a DimVector from the land polygons + gl = GeometryLookup(country_polygons) + ras = rand(Dim{:Geometry}(gl)) + @testset "indexing" begin + # TODO: fix this to only return a single index, if necessary... + @test only(ras[Geometry = (X(At(-103)), Y(At(44)))]) == ras[findfirst(==("USA"), country_fc.ADM0_A3)] + end + + @testset "indexing with geometry" begin + for fname in (:equals, :intersects, + :contains, :within, :covers, + :coveredby, :touches, :disjoint) + @testset "Fix2 with GeometryOps $fname" begin + @test isempty(setdiff( + Rasters.DD.dims2indices(ras, getproperty(GO, fname)(gl[1])), + filter(axes(gl, 1)) do idx + getproperty(GO, fname)(gl[idx], gl[1]) + end + )) + end + end + @test ras[Geometry=Where(GO.contains(gl[1]))] == ras[Geometry=1] + @test ras[Geometry=Where(GO.equals(gl[1]))] == ras[Geometry=1] + @test ras[Geometry=Where(GO.disjoint(gl[1]))] == ras[Geometry=2:DD.End()] + end +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 7d0c8505..5ec96964 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -20,6 +20,7 @@ end @time @safetestset "utils" begin include("utils.jl") end @time @safetestset "set" begin include("set.jl") end @time @safetestset "adapt" begin include("adapt.jl") end +@time @safetestset "geometrylookups" begin include("geometry_lookup.jl") end @time @safetestset "methods" begin include("methods.jl") end @time @safetestset "aggregate" begin include("aggregate.jl") end @time @safetestset "mosaic" begin include("mosaic.jl") end