diff --git a/CHANGELOG.md b/CHANGELOG.md index e79dca0ea..b397fc3eb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added `USE_DATETIME` environment variable to configure datetime search behavior in SFEOS. [#452](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/452) - GET `/collections` collection search sort extension ex. `/collections?sortby=+id`. [#456](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/456) +- GET `/collections` collection search fields extension ex. `/collections?fields=id,title`. [#465](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/465) +- Improved error messages for sorting on unsortable fields in collection search, including guidance on how to make fields sortable. [#465](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/465) +- Added field alias for `temporal` to enable easier sorting by temporal extent, alongside `extent.temporal.interval`. [#465](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/465) ### Changed diff --git a/README.md b/README.md index 578a440a4..929d17c6a 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,10 @@ SFEOS (stac-fastapi-elasticsearch-opensearch) is a high-performance, scalable AP - **Scale to millions of geospatial assets** with fast search performance through optimized spatial indexing and query capabilities - **Support OGC-compliant filtering** including spatial operations (intersects, contains, etc.) and temporal queries - **Perform geospatial aggregations** to analyze data distribution across space and time +- **Enhanced collection search capabilities** with support for sorting and field selection This implementation builds on the STAC-FastAPI framework, providing a production-ready solution specifically optimized for Elasticsearch and OpenSearch databases. It's ideal for organizations managing large geospatial data catalogs who need efficient discovery and access capabilities through standardized APIs. - - ## Common Deployment Patterns stac-fastapi-elasticsearch-opensearch can be deployed in several ways depending on your needs: @@ -72,6 +71,7 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Common Deployment Patterns](#common-deployment-patterns) - [Technologies](#technologies) - [Table of Contents](#table-of-contents) + - [Collection Search Extensions](#collection-search-extensions) - [Documentation \& Resources](#documentation--resources) - [Package Structure](#package-structure) - [Examples](#examples) @@ -113,6 +113,30 @@ This project is built on the following technologies: STAC, stac-fastapi, FastAPI - [Gitter Chat](https://app.gitter.im/#/room/#stac-fastapi-elasticsearch_community:gitter.im) - For real-time discussions - [GitHub Discussions](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/discussions) - For longer-form questions and answers +## Collection Search Extensions + +SFEOS implements extended capabilities for the `/collections` endpoint, allowing for more powerful collection discovery: + +- **Sorting**: Sort collections by sortable fields using the `sortby` parameter + - Example: `/collections?sortby=+id` (ascending sort by ID) + - Example: `/collections?sortby=-id` (descending sort by ID) + - Example: `/collections?sortby=-temporal` (descending sort by temporal extent) + +- **Field Selection**: Request only specific fields to be returned using the `fields` parameter + - Example: `/collections?fields=id,title,description` + - This helps reduce payload size when only certain fields are needed + +These extensions make it easier to build user interfaces that display and navigate through collections efficiently. + +> **Note**: Sorting is only available on fields that are indexed for sorting in Elasticsearch/OpenSearch. With the default mappings, you can sort on: +> - `id` (keyword field) +> - `extent.temporal.interval` (date field) +> - `temporal` (alias to extent.temporal.interval) +> +> Text fields like `title` and `description` are not sortable by default as they use text analysis for better search capabilities. Attempting to sort on these fields will result in a user-friendly error message explaining which fields are sortable and how to make additional fields sortable by updating the mappings. +> +> **Important**: Adding keyword fields to make text fields sortable can significantly increase the index size, especially for large text fields. Consider the storage implications when deciding which fields to make sortable. + ## Package Structure This project is organized into several packages, each with a specific purpose: diff --git a/stac_fastapi/core/stac_fastapi/core/core.py b/stac_fastapi/core/stac_fastapi/core/core.py index a38bdddba..4f35ed413 100644 --- a/stac_fastapi/core/stac_fastapi/core/core.py +++ b/stac_fastapi/core/stac_fastapi/core/core.py @@ -225,11 +225,13 @@ async def landing_page(self, **kwargs) -> stac_types.LandingPage: return landing_page async def all_collections( - self, sortby: Optional[str] = None, **kwargs + self, fields: Optional[List[str]] = None, sortby: Optional[str] = None, **kwargs ) -> stac_types.Collections: """Read all collections from the database. Args: + fields (Optional[List[str]]): Fields to include or exclude from the results. + sortby (Optional[str]): Sorting options for the results. **kwargs: Keyword arguments from the request. Returns: @@ -240,6 +242,15 @@ async def all_collections( limit = int(request.query_params.get("limit", os.getenv("STAC_ITEM_LIMIT", 10))) token = request.query_params.get("token") + # Process fields parameter for filtering collection properties + includes, excludes = set(), set() + if fields and self.extension_is_enabled("FieldsExtension"): + for field in fields: + if field[0] == "-": + excludes.add(field[1:]) + else: + includes.add(field[1:] if field[0] in "+ " else field) + sort = None if sortby: parsed_sort = [] @@ -259,6 +270,15 @@ async def all_collections( token=token, limit=limit, request=request, sort=sort ) + # Apply field filtering if fields parameter was provided + if fields and self.extension_is_enabled("FieldsExtension"): + filtered_collections = [ + filter_fields(collection, includes, excludes) + for collection in collections + ] + else: + filtered_collections = collections + links = [ {"rel": Relations.root.value, "type": MimeTypes.json, "href": base_url}, {"rel": Relations.parent.value, "type": MimeTypes.json, "href": base_url}, @@ -273,7 +293,7 @@ async def all_collections( next_link = PagingLinks(next=next_token, request=request).link_next() links.append(next_link) - return stac_types.Collections(collections=collections, links=links) + return stac_types.Collections(collections=filtered_collections, links=links) async def get_collection( self, collection_id: str, **kwargs diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py index ea26c0229..67600072a 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/app.py @@ -120,7 +120,7 @@ collection_search_extensions = [ # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]), SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]), - # FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), + FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), # CollectionSearchFilterExtension( # conformance_classes=[FilterConformanceClasses.COLLECTIONS] # ), diff --git a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py index df1e816db..35cd8d9e2 100644 --- a/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py +++ b/stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py @@ -186,13 +186,28 @@ async def get_all_collections( Returns: A tuple of (collections, next pagination token if any). + + Raises: + HTTPException: If sorting is requested on a field that is not sortable. """ + # Define sortable fields based on the ES_COLLECTIONS_MAPPINGS + sortable_fields = ["id", "extent.temporal.interval", "temporal"] + + # Format the sort parameter formatted_sort = [] if sort: for item in sort: field = item.get("field") direction = item.get("direction", "asc") if field: + # Validate that the field is sortable + if field not in sortable_fields: + raise HTTPException( + status_code=400, + detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. " + + "Text fields are not sortable by default in Elasticsearch. " + + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ", + ) formatted_sort.append({field: {"order": direction}}) # Always include id as a secondary sort to ensure consistent pagination if not any("id" in item for item in formatted_sort): diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py index 789cb7281..7d9f5d916 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/app.py @@ -120,7 +120,7 @@ collection_search_extensions = [ # QueryExtension(conformance_classes=[QueryConformanceClasses.COLLECTIONS]), SortExtension(conformance_classes=[SortConformanceClasses.COLLECTIONS]), - # FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), + FieldsExtension(conformance_classes=[FieldsConformanceClasses.COLLECTIONS]), # CollectionSearchFilterExtension( # conformance_classes=[FilterConformanceClasses.COLLECTIONS] # ), diff --git a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py index 4253a00a7..94a95b32a 100644 --- a/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py +++ b/stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py @@ -170,13 +170,28 @@ async def get_all_collections( Returns: A tuple of (collections, next pagination token if any). + + Raises: + HTTPException: If sorting is requested on a field that is not sortable. """ + # Define sortable fields based on the ES_COLLECTIONS_MAPPINGS + sortable_fields = ["id", "extent.temporal.interval", "temporal"] + + # Format the sort parameter formatted_sort = [] if sort: for item in sort: field = item.get("field") direction = item.get("direction", "asc") if field: + # Validate that the field is sortable + if field not in sortable_fields: + raise HTTPException( + status_code=400, + detail=f"Field '{field}' is not sortable. Sortable fields are: {', '.join(sortable_fields)}. " + + "Text fields are not sortable by default in OpenSearch. " + + "To make a field sortable, update the mapping to use 'keyword' type or add a '.keyword' subfield. ", + ) formatted_sort.append({field: {"order": direction}}) # Always include id as a secondary sort to ensure consistent pagination if not any("id" in item for item in formatted_sort): diff --git a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py index 17cdd1ea0..df002dc5c 100644 --- a/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py +++ b/stac_fastapi/sfeos_helpers/stac_fastapi/sfeos_helpers/mappings.py @@ -165,6 +165,8 @@ class Geometry(Protocol): # noqa "providers": {"type": "object", "enabled": False}, "links": {"type": "object", "enabled": False}, "item_assets": {"type": "object", "enabled": get_bool_env("STAC_INDEX_ASSETS")}, + # Field alias to allow sorting on 'temporal' (points to extent.temporal.interval) + "temporal": {"type": "alias", "path": "extent.temporal.interval"}, }, } diff --git a/stac_fastapi/tests/api/test_api_search_collections.py b/stac_fastapi/tests/api/test_api_search_collections.py index ed0dfc1bb..ffd84831d 100644 --- a/stac_fastapi/tests/api/test_api_search_collections.py +++ b/stac_fastapi/tests/api/test_api_search_collections.py @@ -77,3 +77,78 @@ async def test_collections_sort_id_desc(app_client, txn_client, load_test_data): assert len(test_collections) == len(collection_ids) for i, expected_id in enumerate(sorted_ids): assert test_collections[i]["id"] == expected_id + + +@pytest.mark.asyncio +async def test_collections_fields(app_client, txn_client, load_test_data): + """Verify GET /collections honors the fields parameter.""" + # Create multiple collections with different ids + base_collection = load_test_data("test_collection.json") + + # Create collections with ids in a specific order to test fields + # Use unique prefixes to avoid conflicts between tests + test_prefix = f"fields-{uuid.uuid4().hex[:8]}" + collection_ids = [f"{test_prefix}-a", f"{test_prefix}-b", f"{test_prefix}-c"] + + for i, coll_id in enumerate(collection_ids): + test_collection = base_collection.copy() + test_collection["id"] = coll_id + test_collection["title"] = f"Test Collection {i}" + test_collection["description"] = f"Description for collection {i}" + await create_collection(txn_client, test_collection) + + # Test include fields parameter + resp = await app_client.get( + "/collections", + params=[("fields", "id"), ("fields", "title")], + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Check if collections exist in the response + assert "collections" in resp_json, "No collections in response" + + # Filter collections to only include the ones we created for this test + test_collections = [] + for c in resp_json["collections"]: + if "id" in c and c["id"].startswith(test_prefix): + test_collections.append(c) + + # Filter collections to only include the ones we created for this test + test_collections = [] + for c in resp_json["collections"]: + if "id" in c and c["id"].startswith(test_prefix): + test_collections.append(c) + + # Collections should only have id and title fields + for collection in test_collections: + assert "id" in collection + assert "title" in collection + assert "description" not in collection + assert "links" in collection # links are always included + + # Test exclude fields parameter + resp = await app_client.get( + "/collections", + params=[("fields", "-description")], + ) + assert resp.status_code == 200 + resp_json = resp.json() + + # Check if collections exist in the response + assert ( + "collections" in resp_json + ), "No collections in response for exclude fields test" + + # Filter collections to only include the ones we created for this test + test_collections = [] + for c in resp_json["collections"]: + if "id" in c and c["id"].startswith(test_prefix): + test_collections.append(c) + + # Collections should have all fields except description + for collection in test_collections: + assert "id" in collection + assert "title" in collection + assert "description" not in collection + assert "links" in collection