Skip to content

Commit 83f0e1e

Browse files
authored
fix: add option to create collection if missing (#152)
1 parent 46c37a0 commit 83f0e1e

File tree

3 files changed

+228
-0
lines changed

3 files changed

+228
-0
lines changed

lib/stac-item-loader/index.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,10 @@ export interface StacItemLoaderProps {
134134
*
135135
* These will be merged with the default environment variables including
136136
* PGSTAC_SECRET_ARN. Use this for custom configuration or debugging flags.
137+
*
138+
* If you want to enable the option to upload a boilerplate collection record
139+
* in the event that the collection record does not yet exist for an item that
140+
* is set to be loaded, set the variable `"CREATE_COLLECTIONS_IF_MISSING": "TRUE"`.
137141
*/
138142
readonly environment?: { [key: string]: string };
139143

lib/stac-item-loader/runtime/src/stac_item_loader/handler.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
from pydantic import ValidationError
1919
from pypgstac.db import PgstacDB
2020
from pypgstac.load import Loader, Methods
21+
from stac_pydantic.collection import Collection, Extent, SpatialExtent, TimeInterval
2122
from stac_pydantic.item import Item
23+
from stac_pydantic.links import Link, Links
2224

2325
if TYPE_CHECKING:
2426
from aws_lambda_typing.context import Context
@@ -212,6 +214,31 @@ def handler(
212214
try:
213215
with PgstacDB(dsn=pgstac_dsn) as db:
214216
loader = Loader(db=db)
217+
if os.getenv("CREATE_COLLECTIONS_IF_MISSING"):
218+
collection_exists = db.query_one(
219+
f"SELECT count(*) as count from collections where id = '{collection_id}'"
220+
)
221+
if not collection_exists:
222+
logger.info(
223+
f"[{collection_id}] loading collection into database because it is missing."
224+
)
225+
collection = Collection(
226+
id=collection_id,
227+
description=collection_id,
228+
links=Links([Link(href="placeholder", rel="self")]),
229+
type="Collection",
230+
license="proprietary",
231+
extent=Extent(
232+
spatial=SpatialExtent(bbox=[[-180, -90, 180, 90]]),
233+
temporal=TimeInterval(interval=[[None, None]]),
234+
),
235+
stac_version=items[0]["stac_version"],
236+
)
237+
loader.load_collections(
238+
[collection.model_dump()], # type: ignore
239+
insert_mode=Methods.upsert,
240+
)
241+
215242
logger.info(f"[{collection_id}] loading items into database.")
216243
loader.load_items(
217244
file=items, # type: ignore

lib/stac-item-loader/runtime/tests/test_item_load_handler.py

Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
count_collection_items,
1111
get_all_collection_items,
1212
)
13+
from pypgstac.db import PgstacDB
1314
from stac_item_loader.handler import get_pgstac_dsn, handler
1415

1516

@@ -35,6 +36,7 @@ def create_valid_stac_item(collection_id=TEST_COLLECTION_IDS[0], item_id="test-i
3536
},
3637
"assets": {},
3738
"links": [],
39+
"stac_version": "1.1.0",
3840
}
3941

4042

@@ -781,3 +783,198 @@ def test_process_s3_event_with_multiple_records():
781783
# Should raise ValueError
782784
with pytest.raises(ValueError, match="more than one S3 event record"):
783785
process_s3_event(message_str)
786+
787+
788+
@patch.dict(os.environ, {"CREATE_COLLECTIONS_IF_MISSING": "true"})
789+
def test_handler_creates_missing_collection(
790+
mock_aws_context, mock_pgstac_dsn, database_url
791+
):
792+
"""Test that collections are created when CREATE_COLLECTIONS_IF_MISSING is set and collection doesn't exist"""
793+
missing_collection_id = "missing-test-collection"
794+
item_id = "test-item-missing-collection"
795+
796+
with PgstacDB(dsn=database_url) as db:
797+
result = db.query_one(
798+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
799+
)
800+
assert result == 0, "Collection should not exist initially"
801+
802+
valid_item = create_valid_stac_item(
803+
collection_id=missing_collection_id, item_id=item_id
804+
)
805+
806+
event = {"Records": [create_sqs_record(valid_item, message_id="test-message-1")]}
807+
result = handler(event, mock_aws_context)
808+
809+
assert result is None
810+
811+
with PgstacDB(dsn=database_url) as db:
812+
result = db.query_one(
813+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
814+
)
815+
assert result == 1, "Collection should have been created"
816+
817+
assert check_item_exists(
818+
database_url, missing_collection_id, item_id
819+
), "Item was not found in the database"
820+
821+
822+
def test_handler_does_not_create_collection_without_env_var(
823+
mock_aws_context, mock_pgstac_dsn, database_url
824+
):
825+
"""Test that collections are NOT created when CREATE_COLLECTIONS_IF_MISSING env var is not set"""
826+
if "CREATE_COLLECTIONS_IF_MISSING" in os.environ:
827+
del os.environ["CREATE_COLLECTIONS_IF_MISSING"]
828+
829+
missing_collection_id = "missing-test-collection-2"
830+
item_id = "test-item-missing-collection-2"
831+
832+
with PgstacDB(dsn=database_url) as db:
833+
result = db.query_one(
834+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
835+
)
836+
assert result == 0, "Collection should not exist initially"
837+
838+
valid_item = create_valid_stac_item(
839+
collection_id=missing_collection_id, item_id=item_id
840+
)
841+
842+
event = {"Records": [create_sqs_record(valid_item, message_id="test-message-1")]}
843+
result = handler(event, mock_aws_context)
844+
845+
assert result is not None
846+
assert "batchItemFailures" in result
847+
assert any(
848+
failure["itemIdentifier"] == "test-message-1"
849+
for failure in result["batchItemFailures"]
850+
)
851+
852+
with PgstacDB(dsn=database_url) as db:
853+
result = db.query_one(
854+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
855+
)
856+
assert result == 0, "Collection should not have been created"
857+
858+
assert not check_item_exists(
859+
database_url, missing_collection_id, item_id
860+
), "Item should not have been added to the database"
861+
862+
863+
@patch.dict(os.environ, {"CREATE_COLLECTIONS_IF_MISSING": "true"})
864+
def test_handler_does_not_recreate_existing_collection(
865+
mock_aws_context, mock_pgstac_dsn, database_url
866+
):
867+
"""Test that existing collections are not recreated when they already exist"""
868+
existing_collection_id = TEST_COLLECTION_IDS[0]
869+
item_id = "test-item-existing-collection"
870+
871+
with PgstacDB(dsn=database_url) as db:
872+
original_collection = db.query_one(
873+
f"SELECT * from collections where id = '{existing_collection_id}'"
874+
)
875+
assert original_collection is not None, "Test collection should exist"
876+
877+
valid_item = create_valid_stac_item(
878+
collection_id=existing_collection_id, item_id=item_id
879+
)
880+
881+
event = {"Records": [create_sqs_record(valid_item, message_id="test-message-1")]}
882+
result = handler(event, mock_aws_context)
883+
884+
assert result is None
885+
886+
with PgstacDB(dsn=database_url) as db:
887+
current_collection = db.query_one(
888+
f"SELECT * from collections where id = '{existing_collection_id}'"
889+
)
890+
assert (
891+
current_collection == original_collection
892+
), "Existing collection should not have been modified"
893+
894+
assert check_item_exists(
895+
database_url, existing_collection_id, item_id
896+
), "Item was not found in the database"
897+
898+
899+
@patch.dict(os.environ, {"CREATE_COLLECTIONS_IF_MISSING": "true"})
900+
def test_handler_creates_collection_with_multiple_items(
901+
mock_aws_context, mock_pgstac_dsn, database_url
902+
):
903+
"""Test that collection creation works with multiple items from the same missing collection"""
904+
missing_collection_id = "missing-test-collection-multi"
905+
item_ids = ["test-item-1", "test-item-2", "test-item-3"]
906+
907+
with PgstacDB(dsn=database_url) as db:
908+
result = db.query_one(
909+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
910+
)
911+
assert result == 0, "Collection should not exist initially"
912+
913+
items = [
914+
create_valid_stac_item(collection_id=missing_collection_id, item_id=item_id)
915+
for item_id in item_ids
916+
]
917+
918+
event = {
919+
"Records": [
920+
create_sqs_record(item, message_id=f"test-message-{i}")
921+
for i, item in enumerate(items)
922+
]
923+
}
924+
925+
result = handler(event, mock_aws_context)
926+
927+
assert result is None
928+
929+
with PgstacDB(dsn=database_url) as db:
930+
result = db.query_one(
931+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
932+
)
933+
assert result == 1, "Collection should have been created exactly once"
934+
935+
for item_id in item_ids:
936+
assert check_item_exists(
937+
database_url, missing_collection_id, item_id
938+
), f"Item {item_id} was not found in the database"
939+
940+
941+
@patch.dict(os.environ, {"CREATE_COLLECTIONS_IF_MISSING": "true"})
942+
@patch("pypgstac.load.Loader.load_collections")
943+
def test_handler_collection_creation_failure(
944+
mock_load_collections, mock_aws_context, mock_pgstac_dsn, database_url
945+
):
946+
"""Test collection creation failure handling"""
947+
mock_load_collections.side_effect = Exception("Failed to create collection")
948+
949+
missing_collection_id = "missing-test-collection-fail"
950+
item_id = "test-item-collection-fail"
951+
952+
with PgstacDB(dsn=database_url) as db:
953+
result = db.query_one(
954+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
955+
)
956+
assert result == 0, "Collection should not exist initially"
957+
958+
valid_item = create_valid_stac_item(
959+
collection_id=missing_collection_id, item_id=item_id
960+
)
961+
962+
event = {"Records": [create_sqs_record(valid_item, message_id="test-message-1")]}
963+
result = handler(event, mock_aws_context)
964+
965+
assert result is not None
966+
assert "batchItemFailures" in result
967+
assert any(
968+
failure["itemIdentifier"] == "test-message-1"
969+
for failure in result["batchItemFailures"]
970+
)
971+
972+
with PgstacDB(dsn=database_url) as db:
973+
result = db.query_one(
974+
f"SELECT count(*) as count from collections where id = '{missing_collection_id}'"
975+
)
976+
assert result == 0, "Collection should not have been created due to failure"
977+
978+
assert not check_item_exists(
979+
database_url, missing_collection_id, item_id
980+
), "Item should not have been added to the database"

0 commit comments

Comments
 (0)