From 5a8e899d53034056df264daf5d89831dfc258718 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 10:29:40 +0100 Subject: [PATCH 01/26] mogpr to openeo_udp --- openeo_udp/mogpr/README.md | 51 +++++++++++++++++++++++ openeo_udp/mogpr/generate.py | 53 ++++++++++++++++++++++++ openeo_udp/mogpr/set_path.py | 79 ++++++++++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+) create mode 100644 openeo_udp/mogpr/README.md create mode 100644 openeo_udp/mogpr/generate.py create mode 100644 openeo_udp/mogpr/set_path.py diff --git a/openeo_udp/mogpr/README.md b/openeo_udp/mogpr/README.md new file mode 100644 index 00000000..2f2b14c9 --- /dev/null +++ b/openeo_udp/mogpr/README.md @@ -0,0 +1,51 @@ +# Multi output gaussian process regression + +## Description + +Compute an integrated timeseries based on multiple inputs. +For instance, combine Sentinel-2 NDVI with Sentinel-1 RVI into one integrated NDVI. + +## Limitations + +The spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km). + +## Configuration & Resource Usage + +Run configurations for different ROI/TOI with memory requirements and estimated run durations. + +### Synchronous calls + +TODO: Replace with actual measurements!!! + +| Spatial extent | Run duration | +|----------------|--------------| +| 100 m x 100 m | 1 minute | +| 500m x 500 m | 1 minute | +| 1 km x 1 km | 1 minute | +| 5 km x 5 km | 2 minutes | +| 10 km x 10 km | 3 minutes | +| 50 km x 50 km | 9 minutes | + +The maximum duration of a synchronous run is 15 minutes. +For long running computations, you can use batch jobs. + +### Batch jobs + +TODO: Replace with actual measurements!!! + +| Spatial extent | Temporal extent | Executor memory | Run duration | +|-----------------|-----------------|-----------------|--------------| +| 100 m x 100 m | 1 month | default | 7 minutes | +| 500 m x 100 m | 1 month | default | 7 minutes | +| 1 km x 1 km | 1 month | default | 7 minutes | +| 5 km x 5 km | 1 month | default | 10 minutes | +| 10 km x 10 km | 1 month | default | 11 minutes | +| 50 km x 50 km | 1 month | 6 GB | 20 minutes | +| 100 km x 100 km | 1 month | 7 GB | 34 minutes | +| 100m x 100 m | 7 months | default | 10 minutes | +| 500 m x 500 m | 7 months | default | 10 minutes | +| 1 km x 1 km | 7 months | default | 14 minutes | +| 5 km x 5 km | 7 months | default | 14 minutes | +| 10 km x 10 km | 7 months | default | 19 minutes | +| 50 km x 50 km | 7 months | 6 GB | 45 minutes | +| 100 km x 100 km | 7 months | 8 GB | 65 minutes | diff --git a/openeo_udp/mogpr/generate.py b/openeo_udp/mogpr/generate.py new file mode 100644 index 00000000..a5edbae2 --- /dev/null +++ b/openeo_udp/mogpr/generate.py @@ -0,0 +1,53 @@ +import json +from pathlib import Path +from set_path import load_set_path +from typing import Union + +import openeo +from openeo import DataCube +from openeo.api.process import Parameter +from openeo.processes import ProcessBuilder, apply_neighborhood +from openeo.rest.udp import build_process_dict + +from fusets.openeo import load_mogpr_udf +from fusets.openeo.services.publish_mogpr import NEIGHBORHOOD_SIZE + +def get_mogpr( + input_cube: Union[DataCube, Parameter], +) -> ProcessBuilder: + return apply_neighborhood(input_cube, + lambda data: data.run_udf(udf=load_set_path()+"\n"+load_mogpr_udf(), runtime='Python', context=dict()), + size=[ + {'dimension': 'x', 'value': NEIGHBORHOOD_SIZE, 'unit': 'px'}, + {'dimension': 'y', 'value': NEIGHBORHOOD_SIZE, 'unit': 'px'} + ], overlap=[]) + + +def generate() -> dict: + connection = openeo.connect("openeofed.dataspace.copernicus.eu") + + # define parameters + input_cube = Parameter.datacube( + name="input_raster_cube", + description="Raster cube for which to calculate the peaks and valleys" + ) + + mogpr = get_mogpr( + input_cube=input_cube, + ) + + return build_process_dict( + process_graph=mogpr, + process_id="mogpr", + summary="Integrates timeseries in data cube using multi-output gaussian process regression", + description=(Path(__file__).parent / "README.md").read_text(), + parameters=[input_cube], + returns=None, # TODO + categories=None, # TODO + ) + + +if __name__ == "__main__": + # save the generated process to a file + with open(Path(__file__).parent / "mogpr.json", "w") as f: + json.dump(generate(), f, indent=2) diff --git a/openeo_udp/mogpr/set_path.py b/openeo_udp/mogpr/set_path.py new file mode 100644 index 00000000..686d48b3 --- /dev/null +++ b/openeo_udp/mogpr/set_path.py @@ -0,0 +1,79 @@ +import os +import sys +import zipfile +import requests +import functools +from typing import Union +from pathlib import Path + +from openeo.udf import inspect + + +# Example constants for demonstration +DEPENDENCIES_DIR1 = 'venv' +DEPENDENCIES_DIR2 = 'venv_static' + +DEPENDENCIES_URL1 = "https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip" +DEPENDENCIES_URL2 = "https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip" + + +def download_file(url, path): + """ + Downloads a file from the given URL to the specified path. + """ + response = requests.get(url, stream=True) + with open(path, "wb") as file: + file.write(response.content) + + +def extract_zip(zip_path, extract_to): + """ + Extracts a zip file from zip_path to the specified extract_to directory. + """ + with zipfile.ZipFile(zip_path, "r") as zip_ref: + zip_ref.extractall(extract_to) + os.remove(zip_path) # Clean up the zip file after extraction + + +def add_directory_to_sys_path(directory): + """ + Adds a directory to the Python sys.path if it's not already present. + """ + if directory not in sys.path: + sys.path.insert(0, directory) + +@functools.lru_cache(maxsize=5) +def setup_dependencies(dependencies_url,DEPENDENCIES_DIR): + """ + Main function to set up the dependencies by downloading, extracting, + and adding necessary directories to sys.path. + """ + + inspect(message="Create directories") + # Ensure base directories exist + os.makedirs(DEPENDENCIES_DIR, exist_ok=True) + + # Download and extract dependencies if not already present + if not os.listdir(DEPENDENCIES_DIR): + + inspect(message="Extract dependencies") + zip_path = os.path.join(DEPENDENCIES_DIR, "temp.zip") + download_file(dependencies_url, zip_path) + extract_zip(zip_path, DEPENDENCIES_DIR) + + # Add the extracted dependencies directory to sys.path + add_directory_to_sys_path(DEPENDENCIES_DIR) + inspect(message="Added to the sys path") + +setup_dependencies(DEPENDENCIES_URL1, DEPENDENCIES_DIR1) +setup_dependencies(DEPENDENCIES_URL2, DEPENDENCIES_DIR2) + + +def load_set_path() -> str: + """ + loads path setup functions + @return: + """ + import os + + return Path(os.path.realpath(__file__)).read_text() \ No newline at end of file From f13121c3b439065b66e0b5e5710c76edcd302ce4 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 11:11:07 +0100 Subject: [PATCH 02/26] changed process id --- openeo_udp/mogpr/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/openeo_udp/mogpr/generate.py b/openeo_udp/mogpr/generate.py index a5edbae2..a852a91e 100644 --- a/openeo_udp/mogpr/generate.py +++ b/openeo_udp/mogpr/generate.py @@ -38,7 +38,7 @@ def generate() -> dict: return build_process_dict( process_graph=mogpr, - process_id="mogpr", + process_id="fusets_mogpr", summary="Integrates timeseries in data cube using multi-output gaussian process regression", description=(Path(__file__).parent / "README.md").read_text(), parameters=[input_cube], From 6523e238278c04073c1627e301f2057fd3062e96 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 11:24:26 +0100 Subject: [PATCH 03/26] moved dir due to sys path issue --- openeo_udp/{mogpr => fusets_mogpr}/README.md | 0 openeo_udp/fusets_mogpr/fusets_mogpr.json | 55 +++++++++++++++++++ .../{mogpr => fusets_mogpr}/generate.py | 2 +- .../{mogpr => fusets_mogpr}/set_path.py | 0 4 files changed, 56 insertions(+), 1 deletion(-) rename openeo_udp/{mogpr => fusets_mogpr}/README.md (100%) create mode 100644 openeo_udp/fusets_mogpr/fusets_mogpr.json rename openeo_udp/{mogpr => fusets_mogpr}/generate.py (96%) rename openeo_udp/{mogpr => fusets_mogpr}/set_path.py (100%) diff --git a/openeo_udp/mogpr/README.md b/openeo_udp/fusets_mogpr/README.md similarity index 100% rename from openeo_udp/mogpr/README.md rename to openeo_udp/fusets_mogpr/README.md diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json new file mode 100644 index 00000000..ac1c8374 --- /dev/null +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -0,0 +1,55 @@ +{ + "process_graph": { + "applyneighborhood1": { + "process_id": "apply_neighborhood", + "arguments": { + "data": { + "from_parameter": "input_raster_cube" + }, + "overlap": [], + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "context": {}, + "data": { + "from_parameter": "data" + }, + "runtime": "Python", + "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\nfrom typing import Union\nfrom pathlib import Path\n\nfrom openeo.udf import inspect\n\n\n# Example constants for demonstration\nDEPENDENCIES_DIR1 = 'venv'\nDEPENDENCIES_DIR2 = 'venv_static'\n\nDEPENDENCIES_URL1 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\"\nDEPENDENCIES_URL2 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\"\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n os.remove(zip_path) # Clean up the zip file after extraction\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.insert(0, directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(DEPENDENCIES_URL1, DEPENDENCIES_DIR1)\nsetup_dependencies(DEPENDENCIES_URL2, DEPENDENCIES_DIR2)\n\n\ndef load_set_path() -> str:\n \"\"\"\n loads path setup functions \n @return:\n \"\"\"\n import os\n\n return Path(os.path.realpath(__file__)).read_text()\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + }, + "result": true + } + } + }, + "size": [ + { + "dimension": "x", + "value": 32, + "unit": "px" + }, + { + "dimension": "y", + "value": 32, + "unit": "px" + } + ] + }, + "result": true + } + }, + "id": "fusets_mogpr", + "summary": "Integrates timeseries in data cube using multi-output gaussian process regression", + "description": "# Multi output gaussian process regression\n\n## Description\n\nCompute an integrated timeseries based on multiple inputs.\nFor instance, combine Sentinel-2 NDVI with Sentinel-1 RVI into one integrated NDVI.\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Configuration & Resource Usage\n\nRun configurations for different ROI/TOI with memory requirements and estimated run durations.\n\n### Synchronous calls\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Run duration |\n|----------------|--------------|\n| 100 m x 100 m | 1 minute |\n| 500m x 500 m | 1 minute |\n| 1 km x 1 km | 1 minute |\n| 5 km x 5 km | 2 minutes |\n| 10 km x 10 km | 3 minutes |\n| 50 km x 50 km | 9 minutes |\n\nThe maximum duration of a synchronous run is 15 minutes.\nFor long running computations, you can use batch jobs.\n\n### Batch jobs\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Temporal extent | Executor memory | Run duration |\n|-----------------|-----------------|-----------------|--------------|\n| 100 m x 100 m | 1 month | default | 7 minutes |\n| 500 m x 100 m | 1 month | default | 7 minutes |\n| 1 km x 1 km | 1 month | default | 7 minutes |\n| 5 km x 5 km | 1 month | default | 10 minutes |\n| 10 km x 10 km | 1 month | default | 11 minutes |\n| 50 km x 50 km | 1 month | 6 GB | 20 minutes |\n| 100 km x 100 km | 1 month | 7 GB | 34 minutes |\n| 100m x 100 m | 7 months | default | 10 minutes |\n| 500 m x 500 m | 7 months | default | 10 minutes |\n| 1 km x 1 km | 7 months | default | 14 minutes |\n| 5 km x 5 km | 7 months | default | 14 minutes |\n| 10 km x 10 km | 7 months | default | 19 minutes |\n| 50 km x 50 km | 7 months | 6 GB | 45 minutes |\n| 100 km x 100 km | 7 months | 8 GB | 65 minutes |\n", + "parameters": [ + { + "name": "input_raster_cube", + "description": "Raster cube for which to calculate the peaks and valleys", + "schema": { + "type": "object", + "subtype": "datacube" + } + } + ] +} \ No newline at end of file diff --git a/openeo_udp/mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py similarity index 96% rename from openeo_udp/mogpr/generate.py rename to openeo_udp/fusets_mogpr/generate.py index a852a91e..922a26a1 100644 --- a/openeo_udp/mogpr/generate.py +++ b/openeo_udp/fusets_mogpr/generate.py @@ -49,5 +49,5 @@ def generate() -> dict: if __name__ == "__main__": # save the generated process to a file - with open(Path(__file__).parent / "mogpr.json", "w") as f: + with open(Path(__file__).parent / "fusets_mogpr.json", "w") as f: json.dump(generate(), f, indent=2) diff --git a/openeo_udp/mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py similarity index 100% rename from openeo_udp/mogpr/set_path.py rename to openeo_udp/fusets_mogpr/set_path.py From b2f3b80a08753f78b8acd347038d309fc45f52b9 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:23:02 +0100 Subject: [PATCH 04/26] algorithm catalog --- algorithm_catalog/fusets_mogpr.json | 136 ++++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 algorithm_catalog/fusets_mogpr.json diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json new file mode 100644 index 00000000..1990a742 --- /dev/null +++ b/algorithm_catalog/fusets_mogpr.json @@ -0,0 +1,136 @@ +{ + "id": "fusets_mogpr", + "type": "Feature", + "conformsTo": [ + "http://www.opengis.net/spec/ogcapi-records-1/1.0/req/record-core" + ], + "geometry": null, + "properties": { + "created": "2025-01-093T00:00:00Z", + "updated": "2025-01-09T00:00:00Z", + "type": "apex_algorithm", + "title": "Multi output gaussian process regression", + "description": "Integrates timeseries in data cube using multi-output gaussian process regression. The service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other indicators that are correlated with each other.", + "cost_estimate": 12, + "cost_unit": "platform credits per km²", + "keywords": [ + "timeseries", + "Gaussian Process Regression (GPR)" + ], + "language": { + "code": "en-US", + "name": "English (United States)" + }, + "languages": [ + { + "code": "en-US", + "name": "English (United States)" + } + ], + "contacts": [ + { + "name": "Bram Janssen", + "position": "Researcher", + "organization": "VITO", + "links": [ + { + "href": "https://www.vito.be/", + "rel": "about", + "type": "text/html" + }, + { + "href": "https://github.com/JanssenBrm", + "rel": "about", + "type": "text/html" + } + ], + "contactInstructions": "Contact via VITO", + "roles": [ + "principal investigator" + ] + }, + { + "name": "Pratichhya Sharma", + "position": "Researcher", + "organization": "VITO", + "links": [ + { + "href": "https://www.vito.be/", + "rel": "about", + "type": "text/html" + }, + { + "href": "https://github.com/Pratichhya", + "rel": "about", + "type": "text/html" + } + ], + "contactInstructions": "Contact via VITO", + "roles": [ + "service provider" + ] + }, + { + "name": "VITO", + "links": [ + { + "href": "https://www.vito.be/", + "rel": "about", + "type": "text/html" + } + ], + "contactInstructions": "SEE WEBSITE", + "roles": [ + "processor" + ] + } + ], + "themes": [ + { + "concepts": [ + { + "id": "Normalised vegetation difference index (NDVI)" + }, + { + "id": "Radar Vegetation Index (RVI)" + }, + { + "id": "Multi-output Gaussian Process Regression (MOGPR)" + } + ], + "scheme": "https://gcmd.earthdata.nasa.gov/kms/concepts/concept_scheme/sciencekeywords" + } + ], + "formats": [ + { + "name": "JSON" + } + ], + "license": "other" + }, + "linkTemplates": [], + "links": [ + { + "rel": "openeo-process", + "type": "application/json", + "title": "openEO Process Definition", + "href": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + { + "rel": "service", + "type": "application/json", + "title": "CDSE openEO federation", + "href": "https://openeofed.dataspace.copernicus.eu" + }, + { + "rel": "license", + "href": "https://apex.esa.int/license" + }, + { + "rel": "example", + "type": "application/json", + "title": "Example output", + "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" + } + ] +} \ No newline at end of file From 625c67ef63e28645407dfb0144b0300c7fbe6f99 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 13:44:09 +0100 Subject: [PATCH 05/26] benchmark scenario --- benchmark_scenarios/fusets_mogpr.json | 182 ++++++++++++++++++++++++++ 1 file changed, 182 insertions(+) create mode 100644 benchmark_scenarios/fusets_mogpr.json diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json new file mode 100644 index 00000000..c0b9e6dd --- /dev/null +++ b/benchmark_scenarios/fusets_mogpr.json @@ -0,0 +1,182 @@ +[ + { + "id": "fusets_mogpr", + "type": "openeo", + "description": "Multi output gaussian process regression example on NDVI timeseries", + "backend": "openeofed.dataspace.copernicus.eu", + "process_graph": { + "aggregatespatial1": { + "arguments": { + "data": { + "from_node": "mogpr1" + }, + "geometries": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "reducer": { + "process_graph": { + "mean1": { + "arguments": { + "data": { + "from_parameter": "data" + } + }, + "process_id": "mean", + "result": true + } + } + } + }, + "process_id": "aggregate_spatial" + }, + "loadcollection1": { + "arguments": { + "bands": [ + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "temporal_extent": [ + "2022-05-01", + "2023-07-31" + ] + }, + "process_id": "load_collection" + }, + "loadcollection2": { + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "temporal_extent": [ + "2022-05-01", + "2023-07-31" + ] + }, + "process_id": "load_collection" + }, + "mask1": { + "arguments": { + "data": { + "from_node": "loadcollection1" + }, + "mask": { + "from_node": "toscldilationmask1" + } + }, + "process_id": "mask" + }, + "mogpr1": { + "arguments": { + "input_raster_cube": { + "from_node": "ndvi1" + } + }, + "namespace": "https://openeo.dataspace.copernicus.eu/openeo/1.2/processes/u:3e24e251-2e9a-438f-90a9-d4500e576574/mogpr", + "process_id": "mogpr" + }, + "ndvi1": { + "arguments": { + "data": { + "from_node": "mask1" + }, + "nir": "B08", + "red": "B04" + }, + "process_id": "ndvi" + }, + "toscldilationmask1": { + "arguments": { + "data": { + "from_node": "loadcollection2" + } + }, + "process_id": "to_scl_dilation_mask" + } + }, + "reference_data": { + "job-results.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/job-results.json", + "timeseries.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" + } + } + ] + \ No newline at end of file From bb8e7fb20bae1881c008fb8d1b9b351a1a31e771 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:08:17 +0100 Subject: [PATCH 06/26] updated namespace --- algorithm_catalog/fusets_mogpr.json | 2 +- benchmark_scenarios/fusets_mogpr.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json index 1990a742..309a5120 100644 --- a/algorithm_catalog/fusets_mogpr.json +++ b/algorithm_catalog/fusets_mogpr.json @@ -114,7 +114,7 @@ "rel": "openeo-process", "type": "application/json", "title": "openEO Process Definition", - "href": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + "href": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json" }, { "rel": "service", diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index c0b9e6dd..21006d3d 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -151,7 +151,7 @@ "from_node": "ndvi1" } }, - "namespace": "https://openeo.dataspace.copernicus.eu/openeo/1.2/processes/u:3e24e251-2e9a-438f-90a9-d4500e576574/mogpr", + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", "process_id": "mogpr" }, "ndvi1": { From f837234624a2e74cdae8945bba599f8ea4a92ccc Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:09:00 +0100 Subject: [PATCH 07/26] preetify json --- algorithm_catalog/fusets_mogpr.json | 10 +- benchmark_scenarios/fusets_mogpr.json | 349 +++++++++++----------- openeo_udp/fusets_mogpr/fusets_mogpr.json | 100 +++---- 3 files changed, 229 insertions(+), 230 deletions(-) diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json index 309a5120..0c4ecc41 100644 --- a/algorithm_catalog/fusets_mogpr.json +++ b/algorithm_catalog/fusets_mogpr.json @@ -103,7 +103,7 @@ ], "formats": [ { - "name": "JSON" + "name": "JSON" } ], "license": "other" @@ -127,10 +127,10 @@ "href": "https://apex.esa.int/license" }, { - "rel": "example", - "type": "application/json", - "title": "Example output", - "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" + "rel": "example", + "type": "application/json", + "title": "Example output", + "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" } ] } \ No newline at end of file diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index 21006d3d..00792bd0 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -1,182 +1,181 @@ [ { - "id": "fusets_mogpr", - "type": "openeo", - "description": "Multi output gaussian process regression example on NDVI timeseries", - "backend": "openeofed.dataspace.copernicus.eu", - "process_graph": { - "aggregatespatial1": { - "arguments": { - "data": { - "from_node": "mogpr1" - }, - "geometries": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] - ] - ], - "type": "Polygon" - }, - "reducer": { - "process_graph": { - "mean1": { + "id": "fusets_mogpr", + "type": "openeo", + "description": "Multi output gaussian process regression example on NDVI timeseries", + "backend": "openeofed.dataspace.copernicus.eu", + "process_graph": { + "aggregatespatial1": { "arguments": { - "data": { - "from_parameter": "data" - } + "data": { + "from_node": "mogpr1" + }, + "geometries": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "reducer": { + "process_graph": { + "mean1": { + "arguments": { + "data": { + "from_parameter": "data" + } + }, + "process_id": "mean", + "result": true + } + } + } }, - "process_id": "mean", - "result": true - } + "process_id": "aggregate_spatial" + }, + "loadcollection1": { + "arguments": { + "bands": [ + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "temporal_extent": [ + "2022-05-01", + "2023-07-31" + ] + }, + "process_id": "load_collection" + }, + "loadcollection2": { + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "temporal_extent": [ + "2022-05-01", + "2023-07-31" + ] + }, + "process_id": "load_collection" + }, + "mask1": { + "arguments": { + "data": { + "from_node": "loadcollection1" + }, + "mask": { + "from_node": "toscldilationmask1" + } + }, + "process_id": "mask" + }, + "mogpr1": { + "arguments": { + "input_raster_cube": { + "from_node": "ndvi1" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", + "process_id": "mogpr" + }, + "ndvi1": { + "arguments": { + "data": { + "from_node": "mask1" + }, + "nir": "B08", + "red": "B04" + }, + "process_id": "ndvi" + }, + "toscldilationmask1": { + "arguments": { + "data": { + "from_node": "loadcollection2" + } + }, + "process_id": "to_scl_dilation_mask" } - } - }, - "process_id": "aggregate_spatial" - }, - "loadcollection1": { - "arguments": { - "bands": [ - "B04", - "B08" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] - ] - ], - "type": "Polygon" - }, - "temporal_extent": [ - "2022-05-01", - "2023-07-31" - ] - }, - "process_id": "load_collection" - }, - "loadcollection2": { - "arguments": { - "bands": [ - "SCL" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] - ] - ], - "type": "Polygon" - }, - "temporal_extent": [ - "2022-05-01", - "2023-07-31" - ] - }, - "process_id": "load_collection" - }, - "mask1": { - "arguments": { - "data": { - "from_node": "loadcollection1" - }, - "mask": { - "from_node": "toscldilationmask1" - } - }, - "process_id": "mask" - }, - "mogpr1": { - "arguments": { - "input_raster_cube": { - "from_node": "ndvi1" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", - "process_id": "mogpr" - }, - "ndvi1": { - "arguments": { - "data": { - "from_node": "mask1" - }, - "nir": "B08", - "red": "B04" - }, - "process_id": "ndvi" - }, - "toscldilationmask1": { - "arguments": { - "data": { - "from_node": "loadcollection2" - } }, - "process_id": "to_scl_dilation_mask" - } - }, - "reference_data": { - "job-results.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/job-results.json", - "timeseries.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" - } + "reference_data": { + "job-results.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/job-results.json", + "timeseries.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" + } } - ] - \ No newline at end of file +] \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index ac1c8374..41a7dd98 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -1,55 +1,55 @@ { - "process_graph": { - "applyneighborhood1": { - "process_id": "apply_neighborhood", - "arguments": { - "data": { - "from_parameter": "input_raster_cube" - }, - "overlap": [], - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "context": {}, + "process_graph": { + "applyneighborhood1": { + "process_id": "apply_neighborhood", + "arguments": { "data": { - "from_parameter": "data" + "from_parameter": "input_raster_cube" }, - "runtime": "Python", - "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\nfrom typing import Union\nfrom pathlib import Path\n\nfrom openeo.udf import inspect\n\n\n# Example constants for demonstration\nDEPENDENCIES_DIR1 = 'venv'\nDEPENDENCIES_DIR2 = 'venv_static'\n\nDEPENDENCIES_URL1 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\"\nDEPENDENCIES_URL2 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\"\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n os.remove(zip_path) # Clean up the zip file after extraction\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.insert(0, directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(DEPENDENCIES_URL1, DEPENDENCIES_DIR1)\nsetup_dependencies(DEPENDENCIES_URL2, DEPENDENCIES_DIR2)\n\n\ndef load_set_path() -> str:\n \"\"\"\n loads path setup functions \n @return:\n \"\"\"\n import os\n\n return Path(os.path.realpath(__file__)).read_text()\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" - }, - "result": true + "overlap": [], + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "context": {}, + "data": { + "from_parameter": "data" + }, + "runtime": "Python", + "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\nfrom typing import Union\nfrom pathlib import Path\n\nfrom openeo.udf import inspect\n\n\n# Example constants for demonstration\nDEPENDENCIES_DIR1 = 'venv'\nDEPENDENCIES_DIR2 = 'venv_static'\n\nDEPENDENCIES_URL1 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\"\nDEPENDENCIES_URL2 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\"\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n os.remove(zip_path) # Clean up the zip file after extraction\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.insert(0, directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(DEPENDENCIES_URL1, DEPENDENCIES_DIR1)\nsetup_dependencies(DEPENDENCIES_URL2, DEPENDENCIES_DIR2)\n\n\ndef load_set_path() -> str:\n \"\"\"\n loads path setup functions \n @return:\n \"\"\"\n import os\n\n return Path(os.path.realpath(__file__)).read_text()\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + }, + "result": true + } + } + }, + "size": [ + { + "dimension": "x", + "value": 32, + "unit": "px" + }, + { + "dimension": "y", + "value": 32, + "unit": "px" + } + ] + }, + "result": true + } + }, + "id": "fusets_mogpr", + "summary": "Integrates timeseries in data cube using multi-output gaussian process regression", + "description": "# Multi output gaussian process regression\n\n## Description\n\nCompute an integrated timeseries based on multiple inputs.\nFor instance, combine Sentinel-2 NDVI with Sentinel-1 RVI into one integrated NDVI.\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Configuration & Resource Usage\n\nRun configurations for different ROI/TOI with memory requirements and estimated run durations.\n\n### Synchronous calls\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Run duration |\n|----------------|--------------|\n| 100 m x 100 m | 1 minute |\n| 500m x 500 m | 1 minute |\n| 1 km x 1 km | 1 minute |\n| 5 km x 5 km | 2 minutes |\n| 10 km x 10 km | 3 minutes |\n| 50 km x 50 km | 9 minutes |\n\nThe maximum duration of a synchronous run is 15 minutes.\nFor long running computations, you can use batch jobs.\n\n### Batch jobs\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Temporal extent | Executor memory | Run duration |\n|-----------------|-----------------|-----------------|--------------|\n| 100 m x 100 m | 1 month | default | 7 minutes |\n| 500 m x 100 m | 1 month | default | 7 minutes |\n| 1 km x 1 km | 1 month | default | 7 minutes |\n| 5 km x 5 km | 1 month | default | 10 minutes |\n| 10 km x 10 km | 1 month | default | 11 minutes |\n| 50 km x 50 km | 1 month | 6 GB | 20 minutes |\n| 100 km x 100 km | 1 month | 7 GB | 34 minutes |\n| 100m x 100 m | 7 months | default | 10 minutes |\n| 500 m x 500 m | 7 months | default | 10 minutes |\n| 1 km x 1 km | 7 months | default | 14 minutes |\n| 5 km x 5 km | 7 months | default | 14 minutes |\n| 10 km x 10 km | 7 months | default | 19 minutes |\n| 50 km x 50 km | 7 months | 6 GB | 45 minutes |\n| 100 km x 100 km | 7 months | 8 GB | 65 minutes |\n", + "parameters": [ + { + "name": "input_raster_cube", + "description": "Raster cube for which to calculate the peaks and valleys", + "schema": { + "type": "object", + "subtype": "datacube" } - } - }, - "size": [ - { - "dimension": "x", - "value": 32, - "unit": "px" - }, - { - "dimension": "y", - "value": 32, - "unit": "px" - } - ] - }, - "result": true - } - }, - "id": "fusets_mogpr", - "summary": "Integrates timeseries in data cube using multi-output gaussian process regression", - "description": "# Multi output gaussian process regression\n\n## Description\n\nCompute an integrated timeseries based on multiple inputs.\nFor instance, combine Sentinel-2 NDVI with Sentinel-1 RVI into one integrated NDVI.\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Configuration & Resource Usage\n\nRun configurations for different ROI/TOI with memory requirements and estimated run durations.\n\n### Synchronous calls\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Run duration |\n|----------------|--------------|\n| 100 m x 100 m | 1 minute |\n| 500m x 500 m | 1 minute |\n| 1 km x 1 km | 1 minute |\n| 5 km x 5 km | 2 minutes |\n| 10 km x 10 km | 3 minutes |\n| 50 km x 50 km | 9 minutes |\n\nThe maximum duration of a synchronous run is 15 minutes.\nFor long running computations, you can use batch jobs.\n\n### Batch jobs\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Temporal extent | Executor memory | Run duration |\n|-----------------|-----------------|-----------------|--------------|\n| 100 m x 100 m | 1 month | default | 7 minutes |\n| 500 m x 100 m | 1 month | default | 7 minutes |\n| 1 km x 1 km | 1 month | default | 7 minutes |\n| 5 km x 5 km | 1 month | default | 10 minutes |\n| 10 km x 10 km | 1 month | default | 11 minutes |\n| 50 km x 50 km | 1 month | 6 GB | 20 minutes |\n| 100 km x 100 km | 1 month | 7 GB | 34 minutes |\n| 100m x 100 m | 7 months | default | 10 minutes |\n| 500 m x 500 m | 7 months | default | 10 minutes |\n| 1 km x 1 km | 7 months | default | 14 minutes |\n| 5 km x 5 km | 7 months | default | 14 minutes |\n| 10 km x 10 km | 7 months | default | 19 minutes |\n| 50 km x 50 km | 7 months | 6 GB | 45 minutes |\n| 100 km x 100 km | 7 months | 8 GB | 65 minutes |\n", - "parameters": [ - { - "name": "input_raster_cube", - "description": "Raster cube for which to calculate the peaks and valleys", - "schema": { - "type": "object", - "subtype": "datacube" - } - } - ] + } + ] } \ No newline at end of file From e37b8471750a3758d2ae841958260f77654880ac Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 9 Jan 2025 14:10:58 +0100 Subject: [PATCH 08/26] ruff checked --- openeo_udp/fusets_mogpr/generate.py | 3 --- openeo_udp/fusets_mogpr/set_path.py | 1 - 2 files changed, 4 deletions(-) diff --git a/openeo_udp/fusets_mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py index 922a26a1..8e31fa43 100644 --- a/openeo_udp/fusets_mogpr/generate.py +++ b/openeo_udp/fusets_mogpr/generate.py @@ -3,7 +3,6 @@ from set_path import load_set_path from typing import Union -import openeo from openeo import DataCube from openeo.api.process import Parameter from openeo.processes import ProcessBuilder, apply_neighborhood @@ -24,8 +23,6 @@ def get_mogpr( def generate() -> dict: - connection = openeo.connect("openeofed.dataspace.copernicus.eu") - # define parameters input_cube = Parameter.datacube( name="input_raster_cube", diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index 686d48b3..6fa803c5 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -3,7 +3,6 @@ import zipfile import requests import functools -from typing import Union from pathlib import Path from openeo.udf import inspect From de8fbb1adedecdfe854cce935063620c33e73eac Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 13:37:17 +0100 Subject: [PATCH 09/26] updated the README.md file --- openeo_udp/fusets_mogpr/README.md | 108 +++++++++++++++++++----------- 1 file changed, 70 insertions(+), 38 deletions(-) diff --git a/openeo_udp/fusets_mogpr/README.md b/openeo_udp/fusets_mogpr/README.md index 2f2b14c9..af4fdfff 100644 --- a/openeo_udp/fusets_mogpr/README.md +++ b/openeo_udp/fusets_mogpr/README.md @@ -1,51 +1,83 @@ -# Multi output gaussian process regression +# Multi-output Gaussian process regression (MOGPR) -## Description +The MOGPR service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. -Compute an integrated timeseries based on multiple inputs. -For instance, combine Sentinel-2 NDVI with Sentinel-1 RVI into one integrated NDVI. +## Parameters -## Limitations +The MOGPR service requires the following parameters: -The spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km). +- `datacube`: The input datacube that contains the data to be gap-filled. -## Configuration & Resource Usage +## Usage -Run configurations for different ROI/TOI with memory requirements and estimated run durations. +The MOGPR service can be used as follows: -### Synchronous calls +```python -TODO: Replace with actual measurements!!! +import openeo -| Spatial extent | Run duration | -|----------------|--------------| -| 100 m x 100 m | 1 minute | -| 500m x 500 m | 1 minute | -| 1 km x 1 km | 1 minute | -| 5 km x 5 km | 2 minutes | -| 10 km x 10 km | 3 minutes | -| 50 km x 50 km | 9 minutes | +## Setup of parameters +spat_ext = { +    "type": "Polygon", +    "coordinates": [ + [ + [ +                5.170012098271149, +                51.25062964728295 + ], + [ +                5.17085904378298, +                51.24882567194015 + ], + [ +                5.17857421368097, +                51.2468515482926 + ], + [ +                5.178972704726344, +                51.24982704376254 + ], + [ +                5.170012098271149, +                51.25062964728295 + ] + ] + ] +} +temp_ext = ["2022-05-01", "2023-07-31"] -The maximum duration of a synchronous run is 15 minutes. -For long running computations, you can use batch jobs. +## Setup connection to openEO +eoconn = openeo.connect( +        "openeo.dataspace.copernicus.eu" + ).authenticate_oidc("CDSE") -### Batch jobs +## Create a base NDVI datacube that can be used as input for the service +base = eoconn.load_collection('SENTINEL2_L2A', +                                  spatial_extent=spat_ext, +                                  temporal_extent=temp_ext, +                                  bands=["B04", "B08", "SCL"]) +mask = scl.process("to_scl_dilation_mask", data=scl) +base_cloudmasked = base.mask(mask) +base_ndvi = base_cloudmasked.ndvi(red="B04", nir="B08") -TODO: Replace with actual measurements!!! +process_id = "fusets_mogpr" +namespace_url = "public_url"    # publised URL of the process +## Create a processing graph from the MOGPR process using an active openEO connection +mogpr = eoconn.datacube_from_process( +       process_id=process_id, +       namespace= namespace_url, +       input_raster_cube=base_ndvi, + ) -| Spatial extent | Temporal extent | Executor memory | Run duration | -|-----------------|-----------------|-----------------|--------------| -| 100 m x 100 m | 1 month | default | 7 minutes | -| 500 m x 100 m | 1 month | default | 7 minutes | -| 1 km x 1 km | 1 month | default | 7 minutes | -| 5 km x 5 km | 1 month | default | 10 minutes | -| 10 km x 10 km | 1 month | default | 11 minutes | -| 50 km x 50 km | 1 month | 6 GB | 20 minutes | -| 100 km x 100 km | 1 month | 7 GB | 34 minutes | -| 100m x 100 m | 7 months | default | 10 minutes | -| 500 m x 500 m | 7 months | default | 10 minutes | -| 1 km x 1 km | 7 months | default | 14 minutes | -| 5 km x 5 km | 7 months | default | 14 minutes | -| 10 km x 10 km | 7 months | default | 19 minutes | -| 50 km x 50 km | 7 months | 6 GB | 45 minutes | -| 100 km x 100 km | 7 months | 8 GB | 65 minutes | + +## Calculate the average time series value for the given area of interest +mogpr = mogpr.aggregate_spatial(spat_ext, reducer='mean') + +# Execute the service as a batch process +mogpr_job = mogpr.execute_batch('./mogpr.json', out_format="json", title=f'FuseTS - MOGPR') + +``` + +## Output + +The User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes. \ No newline at end of file From 29f1497f47f06093f07978e3db18cb1caf562439 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 14:20:44 +0100 Subject: [PATCH 10/26] updated requirement txt --- openeo_udp/fusets_mogpr/README.md | 2 +- openeo_udp/fusets_mogpr/generate.py | 4 ++-- qa/unittests/requirements.txt | 1 + 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/openeo_udp/fusets_mogpr/README.md b/openeo_udp/fusets_mogpr/README.md index af4fdfff..be62efa1 100644 --- a/openeo_udp/fusets_mogpr/README.md +++ b/openeo_udp/fusets_mogpr/README.md @@ -66,7 +66,7 @@ namespace_url = "public_url"    # publised URL of the process mogpr = eoconn.datacube_from_process(        process_id=process_id,        namespace= namespace_url, -       input_raster_cube=base_ndvi, +      data=base_ndvi, ) diff --git a/openeo_udp/fusets_mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py index 8e31fa43..72a4979e 100644 --- a/openeo_udp/fusets_mogpr/generate.py +++ b/openeo_udp/fusets_mogpr/generate.py @@ -25,7 +25,7 @@ def get_mogpr( def generate() -> dict: # define parameters input_cube = Parameter.datacube( - name="input_raster_cube", + name="data", description="Raster cube for which to calculate the peaks and valleys" ) @@ -46,5 +46,5 @@ def generate() -> dict: if __name__ == "__main__": # save the generated process to a file - with open(Path(__file__).parent / "fusets_mogpr.json", "w") as f: + with open(Path(__file__).parent / "fusets_mogpr2.json", "w") as f: json.dump(generate(), f, indent=2) diff --git a/qa/unittests/requirements.txt b/qa/unittests/requirements.txt index 01be1428..09eb03cb 100644 --- a/qa/unittests/requirements.txt +++ b/qa/unittests/requirements.txt @@ -3,3 +3,4 @@ git+https://github.com/ESA-APEx/esa-apex-toolbox-python.git@main pytest>=8.2.0 moto[s3, server]>=5.0.13 dirty-equals>=0.8.0 +fusets From 5d311b102d2ede2491dc075080170707e1b5dca9 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:07:01 +0100 Subject: [PATCH 11/26] addressed the suggested changes on the set_path function --- openeo_udp/fusets_mogpr/generate.py | 11 +++++------ openeo_udp/fusets_mogpr/set_path.py | 26 ++++---------------------- 2 files changed, 9 insertions(+), 28 deletions(-) diff --git a/openeo_udp/fusets_mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py index 72a4979e..a3e8b29e 100644 --- a/openeo_udp/fusets_mogpr/generate.py +++ b/openeo_udp/fusets_mogpr/generate.py @@ -1,6 +1,5 @@ import json from pathlib import Path -from set_path import load_set_path from typing import Union from openeo import DataCube @@ -9,16 +8,16 @@ from openeo.rest.udp import build_process_dict from fusets.openeo import load_mogpr_udf -from fusets.openeo.services.publish_mogpr import NEIGHBORHOOD_SIZE + def get_mogpr( input_cube: Union[DataCube, Parameter], ) -> ProcessBuilder: return apply_neighborhood(input_cube, - lambda data: data.run_udf(udf=load_set_path()+"\n"+load_mogpr_udf(), runtime='Python', context=dict()), + lambda data: data.run_udf(udf=Path("set_path.py").read_text()+"\n"+load_mogpr_udf(), runtime='Python', context=dict()), size=[ - {'dimension': 'x', 'value': NEIGHBORHOOD_SIZE, 'unit': 'px'}, - {'dimension': 'y', 'value': NEIGHBORHOOD_SIZE, 'unit': 'px'} + {'dimension': 'x', 'value': 32, 'unit': 'px'}, + {'dimension': 'y', 'value': 32, 'unit': 'px'} ], overlap=[]) @@ -46,5 +45,5 @@ def generate() -> dict: if __name__ == "__main__": # save the generated process to a file - with open(Path(__file__).parent / "fusets_mogpr2.json", "w") as f: + with open(Path(__file__).parent / "fusets_mogpr3.json", "w") as f: json.dump(generate(), f, indent=2) diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index 6fa803c5..c393ea58 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -8,14 +8,6 @@ from openeo.udf import inspect -# Example constants for demonstration -DEPENDENCIES_DIR1 = 'venv' -DEPENDENCIES_DIR2 = 'venv_static' - -DEPENDENCIES_URL1 = "https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip" -DEPENDENCIES_URL2 = "https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip" - - def download_file(url, path): """ Downloads a file from the given URL to the specified path. @@ -31,7 +23,6 @@ def extract_zip(zip_path, extract_to): """ with zipfile.ZipFile(zip_path, "r") as zip_ref: zip_ref.extractall(extract_to) - os.remove(zip_path) # Clean up the zip file after extraction def add_directory_to_sys_path(directory): @@ -39,7 +30,7 @@ def add_directory_to_sys_path(directory): Adds a directory to the Python sys.path if it's not already present. """ if directory not in sys.path: - sys.path.insert(0, directory) + sys.path.append(directory) @functools.lru_cache(maxsize=5) def setup_dependencies(dependencies_url,DEPENDENCIES_DIR): @@ -59,20 +50,11 @@ def setup_dependencies(dependencies_url,DEPENDENCIES_DIR): zip_path = os.path.join(DEPENDENCIES_DIR, "temp.zip") download_file(dependencies_url, zip_path) extract_zip(zip_path, DEPENDENCIES_DIR) + os.remove(zip_path) # Add the extracted dependencies directory to sys.path add_directory_to_sys_path(DEPENDENCIES_DIR) inspect(message="Added to the sys path") -setup_dependencies(DEPENDENCIES_URL1, DEPENDENCIES_DIR1) -setup_dependencies(DEPENDENCIES_URL2, DEPENDENCIES_DIR2) - - -def load_set_path() -> str: - """ - loads path setup functions - @return: - """ - import os - - return Path(os.path.realpath(__file__)).read_text() \ No newline at end of file +setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip", 'venv') +setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip", 'venv_static') \ No newline at end of file From cdd5dff4b0704f34c8009a3331768fbaf3a6e2b2 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:15:41 +0100 Subject: [PATCH 12/26] finalised changes with udp --- openeo_udp/fusets_mogpr/fusets_mogpr.json | 8 ++++---- openeo_udp/fusets_mogpr/generate.py | 2 +- openeo_udp/fusets_mogpr/set_path.py | 1 - 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index 41a7dd98..2c491e96 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -4,7 +4,7 @@ "process_id": "apply_neighborhood", "arguments": { "data": { - "from_parameter": "input_raster_cube" + "from_parameter": "data" }, "overlap": [], "process": { @@ -17,7 +17,7 @@ "from_parameter": "data" }, "runtime": "Python", - "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\nfrom typing import Union\nfrom pathlib import Path\n\nfrom openeo.udf import inspect\n\n\n# Example constants for demonstration\nDEPENDENCIES_DIR1 = 'venv'\nDEPENDENCIES_DIR2 = 'venv_static'\n\nDEPENDENCIES_URL1 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\"\nDEPENDENCIES_URL2 = \"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\"\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n os.remove(zip_path) # Clean up the zip file after extraction\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.insert(0, directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(DEPENDENCIES_URL1, DEPENDENCIES_DIR1)\nsetup_dependencies(DEPENDENCIES_URL2, DEPENDENCIES_DIR2)\n\n\ndef load_set_path() -> str:\n \"\"\"\n loads path setup functions \n @return:\n \"\"\"\n import os\n\n return Path(os.path.realpath(__file__)).read_text()\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\nfrom pathlib import Path\n\nfrom openeo.udf import inspect\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.append(directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n os.remove(zip_path)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\", 'venv')\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\", 'venv_static')\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" }, "result": true } @@ -41,10 +41,10 @@ }, "id": "fusets_mogpr", "summary": "Integrates timeseries in data cube using multi-output gaussian process regression", - "description": "# Multi output gaussian process regression\n\n## Description\n\nCompute an integrated timeseries based on multiple inputs.\nFor instance, combine Sentinel-2 NDVI with Sentinel-1 RVI into one integrated NDVI.\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Configuration & Resource Usage\n\nRun configurations for different ROI/TOI with memory requirements and estimated run durations.\n\n### Synchronous calls\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Run duration |\n|----------------|--------------|\n| 100 m x 100 m | 1 minute |\n| 500m x 500 m | 1 minute |\n| 1 km x 1 km | 1 minute |\n| 5 km x 5 km | 2 minutes |\n| 10 km x 10 km | 3 minutes |\n| 50 km x 50 km | 9 minutes |\n\nThe maximum duration of a synchronous run is 15 minutes.\nFor long running computations, you can use batch jobs.\n\n### Batch jobs\n\nTODO: Replace with actual measurements!!!\n\n| Spatial extent | Temporal extent | Executor memory | Run duration |\n|-----------------|-----------------|-----------------|--------------|\n| 100 m x 100 m | 1 month | default | 7 minutes |\n| 500 m x 100 m | 1 month | default | 7 minutes |\n| 1 km x 1 km | 1 month | default | 7 minutes |\n| 5 km x 5 km | 1 month | default | 10 minutes |\n| 10 km x 10 km | 1 month | default | 11 minutes |\n| 50 km x 50 km | 1 month | 6 GB | 20 minutes |\n| 100 km x 100 km | 1 month | 7 GB | 34 minutes |\n| 100m x 100 m | 7 months | default | 10 minutes |\n| 500 m x 500 m | 7 months | default | 10 minutes |\n| 1 km x 1 km | 7 months | default | 14 minutes |\n| 5 km x 5 km | 7 months | default | 14 minutes |\n| 10 km x 10 km | 7 months | default | 19 minutes |\n| 50 km x 50 km | 7 months | 6 GB | 45 minutes |\n| 100 km x 100 km | 7 months | 8 GB | 65 minutes |\n", + "description": "# Multi-output Gaussian process regression (MOGPR)\n\nThe MOGPR service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators.\n\n## Parameters\n\nThe MOGPR service requires the following parameters:\n\n- `datacube`: The input datacube that contains the data to be gap-filled.\n\n## Usage\n\nThe MOGPR service can be used as follows:\n\n```python\n\nimport openeo\n\n## Setup of parameters\nspat_ext = {\n    \"type\": \"Polygon\",\n    \"coordinates\": [\n [\n [\n                5.170012098271149,\n                51.25062964728295\n ],\n [\n                5.17085904378298,\n                51.24882567194015\n ],\n [\n                5.17857421368097,\n                51.2468515482926\n ],\n [\n                5.178972704726344,\n                51.24982704376254\n ],\n [\n                5.170012098271149,\n                51.25062964728295\n ]\n ]\n ]\n}\ntemp_ext = [\"2022-05-01\", \"2023-07-31\"]\n\n## Setup connection to openEO\neoconn = openeo.connect(\n        \"openeo.dataspace.copernicus.eu\"\n ).authenticate_oidc(\"CDSE\")\n\n## Create a base NDVI datacube that can be used as input for the service\nbase = eoconn.load_collection('SENTINEL2_L2A',\n                                  spatial_extent=spat_ext,\n                                  temporal_extent=temp_ext,\n                                  bands=[\"B04\", \"B08\", \"SCL\"])\nmask = scl.process(\"to_scl_dilation_mask\", data=scl)\nbase_cloudmasked = base.mask(mask)\nbase_ndvi = base_cloudmasked.ndvi(red=\"B04\", nir=\"B08\")\n\nprocess_id = \"fusets_mogpr\"\nnamespace_url = \"public_url\"    # publised URL of the process\n## Create a processing graph from the MOGPR process using an active openEO connection\nmogpr = eoconn.datacube_from_process(\n       process_id=process_id,\n       namespace= namespace_url,\n      data=base_ndvi, \n )\n\n\n## Calculate the average time series value for the given area of interest\nmogpr = mogpr.aggregate_spatial(spat_ext, reducer='mean')\n\n# Execute the service as a batch process\nmogpr_job = mogpr.execute_batch('./mogpr.json', out_format=\"json\", title=f'FuseTS - MOGPR') \n\n```\n\n## Output\n\nThe User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", "parameters": [ { - "name": "input_raster_cube", + "name": "data", "description": "Raster cube for which to calculate the peaks and valleys", "schema": { "type": "object", diff --git a/openeo_udp/fusets_mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py index a3e8b29e..2d4e1283 100644 --- a/openeo_udp/fusets_mogpr/generate.py +++ b/openeo_udp/fusets_mogpr/generate.py @@ -45,5 +45,5 @@ def generate() -> dict: if __name__ == "__main__": # save the generated process to a file - with open(Path(__file__).parent / "fusets_mogpr3.json", "w") as f: + with open(Path(__file__).parent / "fusets_mogpr.json", "w") as f: json.dump(generate(), f, indent=2) diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index c393ea58..59366cfa 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -3,7 +3,6 @@ import zipfile import requests import functools -from pathlib import Path from openeo.udf import inspect From 6d988a8cbce08384442af2975c72ceda8606a619 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:16:08 +0100 Subject: [PATCH 13/26] fusets_version --- qa/unittests/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qa/unittests/requirements.txt b/qa/unittests/requirements.txt index 09eb03cb..b1c0ca1f 100644 --- a/qa/unittests/requirements.txt +++ b/qa/unittests/requirements.txt @@ -3,4 +3,4 @@ git+https://github.com/ESA-APEx/esa-apex-toolbox-python.git@main pytest>=8.2.0 moto[s3, server]>=5.0.13 dirty-equals>=0.8.0 -fusets +fusets>=2.0.1 From 66f83f5628d699cfbc571205aa9840056be8e5e1 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:37:12 +0100 Subject: [PATCH 14/26] back to insert instead of append --- openeo_udp/fusets_mogpr/fusets_mogpr.json | 2 +- openeo_udp/fusets_mogpr/set_path.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index 2c491e96..dbdd8378 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -17,7 +17,7 @@ "from_parameter": "data" }, "runtime": "Python", - "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\nfrom pathlib import Path\n\nfrom openeo.udf import inspect\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.append(directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n os.remove(zip_path)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\", 'venv')\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\", 'venv_static')\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\n\nfrom openeo.udf import inspect\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.insert(0, directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n os.remove(zip_path)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\", 'venv')\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\", 'venv_static')\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" }, "result": true } diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index 59366cfa..a7ecba4c 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -29,7 +29,7 @@ def add_directory_to_sys_path(directory): Adds a directory to the Python sys.path if it's not already present. """ if directory not in sys.path: - sys.path.append(directory) + sys.path.insert(0, directory) @functools.lru_cache(maxsize=5) def setup_dependencies(dependencies_url,DEPENDENCIES_DIR): From 3a455ff205d168102fb1716b65057004a8644fcf Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 14 Jan 2025 15:42:44 +0100 Subject: [PATCH 15/26] updated benchmark scenario and results --- benchmark_scenarios/fusets_mogpr.json | 35 ++++++++++++++++++--------- 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index 00792bd0..91f08296 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -8,7 +8,7 @@ "aggregatespatial1": { "arguments": { "data": { - "from_node": "mogpr1" + "from_node": "fusetsmogpr1" }, "geometries": { "coordinates": [ @@ -53,6 +53,15 @@ }, "process_id": "aggregate_spatial" }, + "fusetsmogpr1": { + "arguments": { + "data": { + "from_node": "ndvi1" + } + }, + "namespace": "https://openeo.dataspace.copernicus.eu/openeo/1.2/processes/u:3e24e251-2e9a-438f-90a9-d4500e576574/fusets_mogpr", + "process_id": "fusets_mogpr" + }, "loadcollection1": { "arguments": { "bands": [ @@ -89,7 +98,7 @@ }, "temporal_extent": [ "2022-05-01", - "2023-07-31" + "2022-06-01" ] }, "process_id": "load_collection" @@ -129,7 +138,7 @@ }, "temporal_extent": [ "2022-05-01", - "2023-07-31" + "2022-06-01" ] }, "process_id": "load_collection" @@ -145,15 +154,6 @@ }, "process_id": "mask" }, - "mogpr1": { - "arguments": { - "input_raster_cube": { - "from_node": "ndvi1" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", - "process_id": "mogpr" - }, "ndvi1": { "arguments": { "data": { @@ -164,6 +164,17 @@ }, "process_id": "ndvi" }, + "saveresult1": { + "arguments": { + "data": { + "from_node": "aggregatespatial1" + }, + "format": "JSON", + "options": {} + }, + "process_id": "save_result", + "result": true + }, "toscldilationmask1": { "arguments": { "data": { From 73ee63941593a57c09dd8e9f046c0157ee8fb545 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:20:52 +0100 Subject: [PATCH 16/26] updated udp to mogpe_s1s2 --- openeo_udp/fusets_mogpr/README.md | 92 +-- openeo_udp/fusets_mogpr/fusets_mogpr.json | 859 +++++++++++++++++++++- openeo_udp/fusets_mogpr/generate.py | 65 +- openeo_udp/fusets_mogpr/helpers.py | 213 ++++++ 4 files changed, 1140 insertions(+), 89 deletions(-) create mode 100644 openeo_udp/fusets_mogpr/helpers.py diff --git a/openeo_udp/fusets_mogpr/README.md b/openeo_udp/fusets_mogpr/README.md index be62efa1..209ff9b7 100644 --- a/openeo_udp/fusets_mogpr/README.md +++ b/openeo_udp/fusets_mogpr/README.md @@ -1,83 +1,39 @@ -# Multi-output Gaussian process regression (MOGPR) +# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR) -The MOGPR service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. +This service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focusses on the fusion of Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources. ## Parameters -The MOGPR service requires the following parameters: +The `fusets_mogpr_s1s2` service requires the following parameters: -- `datacube`: The input datacube that contains the data to be gap-filled. +| Name | Description | Type | Default | +|---|---|---|---------| +| polygon | Polygon representing the AOI on which to apply the data fusion | GeoJSON | | +| temporal_extent | Date range for which to apply the data fusion | Array | | +| s1_collection | S1 data collection to use for the fusion | Text | RVI | +| s2_collection | S2 data collection to use for fusing the data | Text | NDVI | -## Usage +## Supported collections -The MOGPR service can be used as follows: +#### Sentinel-1 -```python +* RVI +* GRD -import openeo +#### Sentinel-2 -## Setup of parameters -spat_ext = { -    "type": "Polygon", -    "coordinates": [ - [ - [ -                5.170012098271149, -                51.25062964728295 - ], - [ -                5.17085904378298, -                51.24882567194015 - ], - [ -                5.17857421368097, -                51.2468515482926 - ], - [ -                5.178972704726344, -                51.24982704376254 - ], - [ -                5.170012098271149, -                51.25062964728295 - ] - ] - ] -} -temp_ext = ["2022-05-01", "2023-07-31"] +* NDVI +* FAPAR +* LAI +* FCOVER +* EVI +* CCC +* CWC -## Setup connection to openEO -eoconn = openeo.connect( -        "openeo.dataspace.copernicus.eu" - ).authenticate_oidc("CDSE") +## Limitations -## Create a base NDVI datacube that can be used as input for the service -base = eoconn.load_collection('SENTINEL2_L2A', -                                  spatial_extent=spat_ext, -                                  temporal_extent=temp_ext, -                                  bands=["B04", "B08", "SCL"]) -mask = scl.process("to_scl_dilation_mask", data=scl) -base_cloudmasked = base.mask(mask) -base_ndvi = base_cloudmasked.ndvi(red="B04", nir="B08") - -process_id = "fusets_mogpr" -namespace_url = "public_url"    # publised URL of the process -## Create a processing graph from the MOGPR process using an active openEO connection -mogpr = eoconn.datacube_from_process( -       process_id=process_id, -       namespace= namespace_url, -      data=base_ndvi, - ) - - -## Calculate the average time series value for the given area of interest -mogpr = mogpr.aggregate_spatial(spat_ext, reducer='mean') - -# Execute the service as a batch process -mogpr_job = mogpr.execute_batch('./mogpr.json', out_format="json", title=f'FuseTS - MOGPR') - -``` +The spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km). ## Output -The User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes. \ No newline at end of file +This User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes. \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index dbdd8378..88332e6e 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -1,10 +1,717 @@ { "process_graph": { + "biopar1": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CWC", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar2": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CCC", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B02", + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection2": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask1": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection2" + } + } + }, + "mask1": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection1" + }, + "mask": { + "from_node": "toscldilationmask1" + } + } + }, + "reducedimension1": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "mask1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement1": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 2 + } + }, + "arrayelement2": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "subtract1": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "arrayelement1" + }, + "y": { + "from_node": "arrayelement2" + } + } + }, + "multiply1": { + "process_id": "multiply", + "arguments": { + "x": 2.5, + "y": { + "from_node": "subtract1" + } + } + }, + "multiply2": { + "process_id": "multiply", + "arguments": { + "x": 6, + "y": { + "from_node": "arrayelement2" + } + } + }, + "add1": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement1" + }, + "y": { + "from_node": "multiply2" + } + } + }, + "arrayelement3": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "multiply3": { + "process_id": "multiply", + "arguments": { + "x": 7.5, + "y": { + "from_node": "arrayelement3" + } + } + }, + "subtract2": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "add1" + }, + "y": { + "from_node": "multiply3" + } + } + }, + "add2": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "subtract2" + }, + "y": 1 + } + }, + "divide1": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "multiply1" + }, + "y": { + "from_node": "add2" + } + }, + "result": true + } + } + } + } + }, + "adddimension1": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension1" + }, + "label": "EVI", + "name": "bands", + "type": "bands" + } + }, + "biopar3": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FCOVER", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar4": { + "process_id": "biopar", + "arguments": { + "biopar_type": "LAI", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar5": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FAPAR", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection3": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection4": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask2": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection4" + } + } + }, + "mask2": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection3" + }, + "mask": { + "from_node": "toscldilationmask2" + } + } + }, + "ndvi1": { + "process_id": "ndvi", + "arguments": { + "data": { + "from_node": "mask2" + }, + "nir": "B08", + "red": "B04", + "target_band": "NDVI" + } + }, + "filterbands1": { + "process_id": "filter_bands", + "arguments": { + "bands": [ + "NDVI" + ], + "data": { + "from_node": "ndvi1" + } + } + }, + "eq1": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ndvi" + } + }, + "if1": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "filterbands1" + }, + "reject": null, + "value": { + "from_node": "eq1" + } + } + }, + "eq2": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fapar" + } + }, + "if2": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar5" + }, + "reject": { + "from_node": "if1" + }, + "value": { + "from_node": "eq2" + } + } + }, + "eq3": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "lai" + } + }, + "if3": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar4" + }, + "reject": { + "from_node": "if2" + }, + "value": { + "from_node": "eq3" + } + } + }, + "eq4": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fcover" + } + }, + "if4": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar3" + }, + "reject": { + "from_node": "if3" + }, + "value": { + "from_node": "eq4" + } + } + }, + "eq5": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "evi" + } + }, + "if5": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension1" + }, + "reject": { + "from_node": "if4" + }, + "value": { + "from_node": "eq5" + } + } + }, + "eq6": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ccc" + } + }, + "if6": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar2" + }, + "reject": { + "from_node": "if5" + }, + "value": { + "from_node": "eq6" + } + } + }, + "eq7": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "cwc" + } + }, + "if7": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar1" + }, + "reject": { + "from_node": "if6" + }, + "value": { + "from_node": "eq7" + } + } + }, + "loadcollection5": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "sarbackscatter1": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection5" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels1": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter1" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "reducedimension2": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "renamelabels1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement4": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "add3": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement4" + }, + "y": { + "from_node": "arrayelement4" + } + } + }, + "arrayelement5": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 0 + } + }, + "add4": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement5" + }, + "y": { + "from_node": "arrayelement4" + } + } + }, + "divide2": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "add3" + }, + "y": { + "from_node": "add4" + } + }, + "result": true + } + } + } + } + }, + "adddimension2": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension2" + }, + "label": "RVI", + "name": "bands", + "type": "bands" + } + }, + "loadcollection6": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "sarbackscatter2": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection6" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels2": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter2" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "eq8": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s1_collection" + }, + "y": "grd" + } + }, + "if8": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "renamelabels2" + }, + "reject": null, + "value": { + "from_node": "eq8" + } + } + }, + "eq9": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s1_collection" + }, + "y": "rvi" + } + }, + "if9": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension2" + }, + "reject": { + "from_node": "if8" + }, + "value": { + "from_node": "eq9" + } + } + }, + "mergecubes1": { + "process_id": "merge_cubes", + "arguments": { + "cube1": { + "from_node": "if7" + }, + "cube2": { + "from_node": "if9" + } + } + }, "applyneighborhood1": { "process_id": "apply_neighborhood", "arguments": { "data": { - "from_parameter": "data" + "from_node": "mergecubes1" }, "overlap": [], "process": { @@ -40,16 +747,154 @@ } }, "id": "fusets_mogpr", - "summary": "Integrates timeseries in data cube using multi-output gaussian process regression", - "description": "# Multi-output Gaussian process regression (MOGPR)\n\nThe MOGPR service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators.\n\n## Parameters\n\nThe MOGPR service requires the following parameters:\n\n- `datacube`: The input datacube that contains the data to be gap-filled.\n\n## Usage\n\nThe MOGPR service can be used as follows:\n\n```python\n\nimport openeo\n\n## Setup of parameters\nspat_ext = {\n    \"type\": \"Polygon\",\n    \"coordinates\": [\n [\n [\n                5.170012098271149,\n                51.25062964728295\n ],\n [\n                5.17085904378298,\n                51.24882567194015\n ],\n [\n                5.17857421368097,\n                51.2468515482926\n ],\n [\n                5.178972704726344,\n                51.24982704376254\n ],\n [\n                5.170012098271149,\n                51.25062964728295\n ]\n ]\n ]\n}\ntemp_ext = [\"2022-05-01\", \"2023-07-31\"]\n\n## Setup connection to openEO\neoconn = openeo.connect(\n        \"openeo.dataspace.copernicus.eu\"\n ).authenticate_oidc(\"CDSE\")\n\n## Create a base NDVI datacube that can be used as input for the service\nbase = eoconn.load_collection('SENTINEL2_L2A',\n                                  spatial_extent=spat_ext,\n                                  temporal_extent=temp_ext,\n                                  bands=[\"B04\", \"B08\", \"SCL\"])\nmask = scl.process(\"to_scl_dilation_mask\", data=scl)\nbase_cloudmasked = base.mask(mask)\nbase_ndvi = base_cloudmasked.ndvi(red=\"B04\", nir=\"B08\")\n\nprocess_id = \"fusets_mogpr\"\nnamespace_url = \"public_url\"    # publised URL of the process\n## Create a processing graph from the MOGPR process using an active openEO connection\nmogpr = eoconn.datacube_from_process(\n       process_id=process_id,\n       namespace= namespace_url,\n      data=base_ndvi, \n )\n\n\n## Calculate the average time series value for the given area of interest\nmogpr = mogpr.aggregate_spatial(spat_ext, reducer='mean')\n\n# Execute the service as a batch process\nmogpr_job = mogpr.execute_batch('./mogpr.json', out_format=\"json\", title=f'FuseTS - MOGPR') \n\n```\n\n## Output\n\nThe User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", + "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", + "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focusses on the fusion of Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n| Name | Description | Type | Default |\n|---|---|---|---------|\n| polygon | Polygon representing the AOI on which to apply the data fusion | GeoJSON | | \n| temporal_extent | Date range for which to apply the data fusion | Array | |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI | \n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", "parameters": [ { - "name": "data", - "description": "Raster cube for which to calculate the peaks and valleys", + "name": "spatial_extent", + "description": "Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent.", + "schema": [ + { + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": [ + "west", + "south", + "east", + "north" + ], + "properties": { + "west": { + "description": "West (lower left corner, coordinate axis 1).", + "type": "number" + }, + "south": { + "description": "South (lower left corner, coordinate axis 2).", + "type": "number" + }, + "east": { + "description": "East (upper right corner, coordinate axis 1).", + "type": "number" + }, + "north": { + "description": "North (upper right corner, coordinate axis 2).", + "type": "number" + }, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [ + 3857 + ] + }, + { + "title": "WKT2", + "type": "string", + "subtype": "wkt2-definition" + } + ], + "default": 4326 + } + } + }, + { + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + }, + { + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null" + } + ] + }, + { + "name": "temporal_extent", + "description": "Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion", "schema": { - "type": "object", - "subtype": "datacube" + "type": "array", + "subtype": "temporal-interval", + "uniqueItems": true, + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + { + "type": "string", + "subtype": "date-time", + "format": "date-time" + }, + { + "type": "string", + "subtype": "date", + "format": "date" + }, + { + "type": "null" + } + ] + } } + }, + { + "name": "s1_collection", + "description": "S1 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "RVI", + "GRD" + ] + }, + "default": "RVI", + "optional": true + }, + { + "name": "s2_collection", + "description": "S2 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "NDVI", + "FAPAR", + "LAI", + "FCOVER", + "EVI", + "CCC", + "CWC" + ] + }, + "default": "NDVI", + "optional": true } ] } \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/generate.py b/openeo_udp/fusets_mogpr/generate.py index 2d4e1283..5fba4e02 100644 --- a/openeo_udp/fusets_mogpr/generate.py +++ b/openeo_udp/fusets_mogpr/generate.py @@ -1,19 +1,32 @@ import json from pathlib import Path -from typing import Union +from typing import Union, Sequence -from openeo import DataCube +import openeo from openeo.api.process import Parameter from openeo.processes import ProcessBuilder, apply_neighborhood from openeo.rest.udp import build_process_dict from fusets.openeo import load_mogpr_udf +from helpers import load_s1_collection, load_s2_collection -def get_mogpr( - input_cube: Union[DataCube, Parameter], + +connection = openeo.connect("openeofed.dataspace.copernicus.eu") + +def get_mogpr_s1_s2( + polygon: Union[Parameter, dict] = None, + date: Union[Sequence[str], Parameter] = None, + s1_collection: Union[str, Parameter] = None, + s2_collection: Union[str, Parameter] = None, ) -> ProcessBuilder: - return apply_neighborhood(input_cube, + s1_input_cube = load_s1_collection(connection, s1_collection, polygon, date) + s2_input_cube = load_s2_collection(connection, s2_collection, polygon, date) + + # Merge the inputs to a single datacube + merged_cube = s2_input_cube.merge_cubes(s1_input_cube) + + return apply_neighborhood(merged_cube, lambda data: data.run_udf(udf=Path("set_path.py").read_text()+"\n"+load_mogpr_udf(), runtime='Python', context=dict()), size=[ {'dimension': 'x', 'value': 32, 'unit': 'px'}, @@ -22,24 +35,48 @@ def get_mogpr( def generate() -> dict: + # define parameters - input_cube = Parameter.datacube( - name="data", - description="Raster cube for which to calculate the peaks and valleys" + polygon = Parameter.spatial_extent( + name="spatial_extent", + description="Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent." + ) + date = Parameter.temporal_interval( + name="temporal_extent", + description="Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion" + ) + s1_collection = Parameter.string( + name="s1_collection", + description="S1 data collection to use for fusing the data", + default='RVI', + values=['RVI', 'GRD'] + ) + s2_collection = Parameter.string( + name="s2_collection", + description="S2 data collection to use for fusing the data", + default='NDVI', + values=['NDVI', 'FAPAR', 'LAI', 'FCOVER', 'EVI', 'CCC', 'CWC'] ) - mogpr = get_mogpr( - input_cube=input_cube, + + mogpr = get_mogpr_s1_s2( + polygon=polygon, + date=date, + s1_collection=s1_collection, + s2_collection=s2_collection ) return build_process_dict( process_graph=mogpr, process_id="fusets_mogpr", - summary="Integrates timeseries in data cube using multi-output gaussian process regression", + summary="Integrate S1 and S2 timeseries using multi-output gaussian process regression", description=(Path(__file__).parent / "README.md").read_text(), - parameters=[input_cube], - returns=None, # TODO - categories=None, # TODO + parameters=[ + polygon, + date, + s1_collection, + s2_collection + ], ) diff --git a/openeo_udp/fusets_mogpr/helpers.py b/openeo_udp/fusets_mogpr/helpers.py new file mode 100644 index 00000000..0ac4875e --- /dev/null +++ b/openeo_udp/fusets_mogpr/helpers.py @@ -0,0 +1,213 @@ +####################################################################################################################### +# S1 collection implementation +####################################################################################################################### +from openeo.processes import process, if_, eq + + +def _load_s1_grd_bands(connection, polygon, date, bands): + """ + Create an S1 datacube containing a selected set of bands from the SENTINEL1_GRD data collection. + :param connection: openEO connection + :param polygon: Area of interest + :param date: Time of interest + :param bands: Bands to load + :return: + """ + s1_grd = connection.load_collection('SENTINEL1_GRD', + spatial_extent=polygon, + temporal_extent=date, + bands=bands) + s1_grd = s1_grd.sar_backscatter(coefficient='sigma0-ellipsoid') + s1_grd = s1_grd.rename_labels(dimension="bands", target=bands) + return s1_grd + + +def _load_rvi(connection, polygon, date): + """ + Create an RVI datacube based on the S1 VV and VH bands. + :param connection: openEO connection + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + base_s1 = _load_s1_grd_bands(connection, polygon, date, ['VV', 'VH']) + + VH = base_s1.band('VH') + VV = base_s1.band('VV') + rvi = (VH + VH) / (VV + VH) + return rvi.add_dimension(name="bands", label="RVI", type="bands") + + +####################################################################################################################### +# S2 collection implementation +####################################################################################################################### + +def _load_ndvi(connection, polygon, date): + """ + Create an NDVI datacube based on the SENTINEL2_L2A data collection. + :param connection: openEO connection + :param polygon: Area of interest + :param date: + :return: + """ + base_s2 = connection.load_collection('SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=["B04", "B08"]) + scl = connection.load_collection('SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=["SCL"]) + mask = mask = scl.process("to_scl_dilation_mask", data=scl) + masked_s2 = base_s2.mask(mask) + ndvi = masked_s2.ndvi(red="B04", nir="B08", target_band='NDVI') + ndvi_filtered = ndvi.filter_bands(bands=['NDVI']) + return ndvi_filtered + + +def _load_biopar(polygon, date, biopar): + """ + Create a BIOPAR datacube. This is done by using the existing BIOPAR service: + https://portal.terrascope.be/catalogue/app-details/21 + + :param polygon: Area of interest + :param date: Time of interest + :param biopar: BIOPAR type (see documentation of service on portal) + :return: + """ + base_biopar = process( + process_id="biopar", + namespace="https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json", + date=date, + polygon=polygon, + biopar_type=biopar + ) + return base_biopar + + +def _load_evi(connection, polygon, date): + """ + Create an EVI datacube. More information is available at https://en.wikipedia.org/wiki/Enhanced_vegetation_index + :param connection: openEO connection + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + base_s2 = connection.load_collection( + collection_id='SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=['B02', 'B04', 'B08'], + ) + scl = connection.load_collection('SENTINEL2_L2A', + spatial_extent=polygon, + temporal_extent=date, + bands=["SCL"]) + mask = mask = scl.process("to_scl_dilation_mask", data=scl) + masked_s2 = base_s2.mask(mask) + + B02 = masked_s2.band('B04') + B04 = masked_s2.band('B04') + B08 = masked_s2.band('B08') + + evi = (2.5 * (B08 - B04)) / ((B08 + 6.0 * B04 - 7.5 * B02) + 1.0) + return evi.add_dimension(name="bands", label="EVI", type="bands") + + +####################################################################################################################### +# OpenEO UDP implementation +####################################################################################################################### +def _build_collection_graph(collection, label, callable, reject): + """ + Helper function that will construct an if-else structure using the if_ openEO process. If the value of the + collection parameter matches with the given label, the callable is executed. If not the reject function is + executed. + + :param collection: openEO collection parameter + :param label: String representing the text with which the collection should match + :param callable: Function that is executed when the collection matches the label + :param reject: Function that is executed when the collection does not match the label + :return: + """ + return if_(eq(collection, label, case_sensitive=False), callable, reject) + + +def load_s1_collection(connection, collection, polygon, date): + """ + Create a S1 input data cube based on the collection selected by the user. This achieved by building an + if-else structure through the different openEO processes, making sure that the correct datacube is selected + when executing the UDP. + + :param connection: openEO connection + :param collection: One of the supported collection (S1_COLLECTIONS) + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + collections = None + for option in [ + { + 'label': 'grd', + 'function': _load_s1_grd_bands(connection=connection, polygon=polygon, date=date, bands=['VV', 'VH']) + }, + { + 'label': 'rvi', + 'function': _load_rvi(connection=connection, polygon=polygon, date=date) + } + ]: + collections = _build_collection_graph(collection=collection, + label=option['label'], + callable=option['function'], + reject=collections) + return collections + + +def load_s2_collection(connection, collection, polygon, date): + """ + Create a S2 input data cube based on the collection selected by the user. This achieved by building an + if-else structure through the different openEO processes, making sure that the correct datacube is selected + when executing the UDP. + + :param connection: openEO connection + :param collection: One of the supported collection (S2_COLLECTIONS) + :param polygon: Area of interest + :param date: Time of interest + :return: + """ + collections = None + for option in [ + { + 'label': 'ndvi', + 'function': _load_ndvi(connection=connection, polygon=polygon, date=date) + + }, + { + 'label': 'fapar', + 'function': _load_biopar(polygon=polygon, date=date, biopar='FAPAR') + }, + { + 'label': 'lai', + 'function': _load_biopar(polygon=polygon, date=date, biopar='LAI') + }, + { + 'label': 'fcover', + 'function': _load_biopar(polygon=polygon, date=date, biopar='FCOVER') + }, + { + 'label': 'evi', + 'function': _load_evi(connection=connection, polygon=polygon, date=date) + }, + { + 'label': 'ccc', + 'function': _load_biopar(polygon=polygon, date=date, biopar='CCC') + }, + { + 'label': 'cwc', + 'function': _load_biopar(polygon=polygon, date=date, biopar='CWC') + } + ]: + collections = _build_collection_graph(collection=collection, + label=option['label'], + callable=option['function'], + reject=collections) + return collections \ No newline at end of file From 8f126800da4f7bc3c09d228ec3b2f5868e83487c Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:29:11 +0100 Subject: [PATCH 17/26] updated benchmark scenario --- benchmark_scenarios/fusets_mogpr.json | 223 ++++++-------------------- 1 file changed, 48 insertions(+), 175 deletions(-) diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index 91f08296..1b494d73 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -5,188 +5,61 @@ "description": "Multi output gaussian process regression example on NDVI timeseries", "backend": "openeofed.dataspace.copernicus.eu", "process_graph": { - "aggregatespatial1": { - "arguments": { - "data": { - "from_node": "fusetsmogpr1" - }, - "geometries": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] - ] - ], - "type": "Polygon" - }, - "reducer": { - "process_graph": { - "mean1": { - "arguments": { - "data": { - "from_parameter": "data" - } - }, - "process_id": "mean", - "result": true - } - } - } - }, - "process_id": "aggregate_spatial" - }, - "fusetsmogpr1": { - "arguments": { - "data": { - "from_node": "ndvi1" - } - }, - "namespace": "https://openeo.dataspace.copernicus.eu/openeo/1.2/processes/u:3e24e251-2e9a-438f-90a9-d4500e576574/fusets_mogpr", - "process_id": "fusets_mogpr" - }, - "loadcollection1": { - "arguments": { - "bands": [ - "B04", - "B08" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] - ] - ], - "type": "Polygon" - }, - "temporal_extent": [ - "2022-05-01", - "2022-06-01" - ] - }, - "process_id": "load_collection" - }, - "loadcollection2": { - "arguments": { - "bands": [ - "SCL" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] - ] - ], - "type": "Polygon" - }, - "temporal_extent": [ - "2022-05-01", - "2022-06-01" + "mogprs1s21601251": { + "arguments": { + "include_raw_inputs": true, + "include_uncertainties": true, + "s1_collection": "RVI", + "s2_collection": "NDVI", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] ] + ], + "type": "Polygon" }, - "process_id": "load_collection" - }, - "mask1": { - "arguments": { - "data": { - "from_node": "loadcollection1" - }, - "mask": { - "from_node": "toscldilationmask1" - } - }, - "process_id": "mask" - }, - "ndvi1": { - "arguments": { - "data": { - "from_node": "mask1" - }, - "nir": "B08", - "red": "B04" - }, - "process_id": "ndvi" + "temporal_extent": [ + "2023-01-01", + "2023-12-31" + ] + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", + "process_id": "fusets_mogpr" }, "saveresult1": { - "arguments": { - "data": { - "from_node": "aggregatespatial1" - }, - "format": "JSON", - "options": {} - }, - "process_id": "save_result", - "result": true - }, - "toscldilationmask1": { - "arguments": { - "data": { - "from_node": "loadcollection2" - } + "arguments": { + "data": { + "from_node": "mogprs1s21601251" }, - "process_id": "to_scl_dilation_mask" + "format": "netCDF", + "options": {} + }, + "process_id": "save_result", + "result": true } - }, + }, "reference_data": { - "job-results.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/job-results.json", - "timeseries.json": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" + "timeseries.nc": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.nc" } } ] \ No newline at end of file From f0becbe3cc263f5a71ff767ce8568982bb8aede7 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 23 Jan 2025 10:29:50 +0100 Subject: [PATCH 18/26] output format --- algorithm_catalog/fusets_mogpr.json | 2 +- benchmark_scenarios/fusets_mogpr.json | 92 +++++++++++++-------------- 2 files changed, 47 insertions(+), 47 deletions(-) diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json index 0c4ecc41..2c5a053b 100644 --- a/algorithm_catalog/fusets_mogpr.json +++ b/algorithm_catalog/fusets_mogpr.json @@ -130,7 +130,7 @@ "rel": "example", "type": "application/json", "title": "Example output", - "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.json" + "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.nc" } ] } \ No newline at end of file diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index 1b494d73..b38247ba 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -6,58 +6,58 @@ "backend": "openeofed.dataspace.copernicus.eu", "process_graph": { "mogprs1s21601251": { - "arguments": { - "include_raw_inputs": true, - "include_uncertainties": true, - "s1_collection": "RVI", - "s2_collection": "NDVI", - "spatial_extent": { - "coordinates": [ - [ - [ - 5.170012098271149, - 51.25062964728295 - ], - [ - 5.17085904378298, - 51.24882567194015 - ], - [ - 5.17857421368097, - 51.2468515482926 - ], - [ - 5.178972704726344, - 51.24982704376254 - ], - [ - 5.170012098271149, - 51.25062964728295 - ] + "arguments": { + "include_raw_inputs": true, + "include_uncertainties": true, + "s1_collection": "RVI", + "s2_collection": "NDVI", + "spatial_extent": { + "coordinates": [ + [ + [ + 5.170012098271149, + 51.25062964728295 + ], + [ + 5.17085904378298, + 51.24882567194015 + ], + [ + 5.17857421368097, + 51.2468515482926 + ], + [ + 5.178972704726344, + 51.24982704376254 + ], + [ + 5.170012098271149, + 51.25062964728295 + ] + ] + ], + "type": "Polygon" + }, + "temporal_extent": [ + "2023-01-01", + "2023-12-31" ] - ], - "type": "Polygon" }, - "temporal_extent": [ - "2023-01-01", - "2023-12-31" - ] - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", - "process_id": "fusets_mogpr" + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", + "process_id": "fusets_mogpr" }, "saveresult1": { - "arguments": { - "data": { - "from_node": "mogprs1s21601251" + "arguments": { + "data": { + "from_node": "mogprs1s21601251" + }, + "format": "netCDF", + "options": {} }, - "format": "netCDF", - "options": {} - }, - "process_id": "save_result", - "result": true + "process_id": "save_result", + "result": true } - }, + }, "reference_data": { "timeseries.nc": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.nc" } From e65120de9419eab453edb97e8398d6c88e171c64 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Wed, 29 Jan 2025 10:03:25 +0100 Subject: [PATCH 19/26] solution for sys path in UDF --- openeo_udp/fusets_mogpr/README.md | 20 ++++-- openeo_udp/fusets_mogpr/fusets_mogpr.json | 4 +- openeo_udp/fusets_mogpr/set_path.py | 87 ++++++++++++++++------- 3 files changed, 77 insertions(+), 34 deletions(-) diff --git a/openeo_udp/fusets_mogpr/README.md b/openeo_udp/fusets_mogpr/README.md index 209ff9b7..1206e0e0 100644 --- a/openeo_udp/fusets_mogpr/README.md +++ b/openeo_udp/fusets_mogpr/README.md @@ -1,6 +1,6 @@ # Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR) -This service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focusses on the fusion of Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources. +This service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources. ## Parameters @@ -8,10 +8,10 @@ The `fusets_mogpr_s1s2` service requires the following parameters: | Name | Description | Type | Default | |---|---|---|---------| -| polygon | Polygon representing the AOI on which to apply the data fusion | GeoJSON | | -| temporal_extent | Date range for which to apply the data fusion | Array | | -| s1_collection | S1 data collection to use for the fusion | Text | RVI | -| s2_collection | S2 data collection to use for fusing the data | Text | NDVI | +| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON |         | +| temporal_extent | Date range for which to apply the data fusion | Array |         | +| s1_collection | S1 data collection to use for the fusion | Text | RVI     | +| s2_collection | S2 data collection to use for fusing the data | Text | NDVI       | ## Supported collections @@ -34,6 +34,16 @@ The `fusets_mogpr_s1s2` service requires the following parameters: The spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km). +## Dependencies + +In addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF): + +* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. + +* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html). + + + ## Output This User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes. \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index 88332e6e..98968d0a 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -724,7 +724,7 @@ "from_parameter": "data" }, "runtime": "Python", - "udf": "import os\nimport sys\nimport zipfile\nimport requests\nimport functools\n\nfrom openeo.udf import inspect\n\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\n\ndef extract_zip(zip_path, extract_to):\n \"\"\"\n Extracts a zip file from zip_path to the specified extract_to directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(extract_to)\n\n\ndef add_directory_to_sys_path(directory):\n \"\"\"\n Adds a directory to the Python sys.path if it's not already present.\n \"\"\"\n if directory not in sys.path:\n sys.path.insert(0, directory)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url,DEPENDENCIES_DIR):\n \"\"\"\n Main function to set up the dependencies by downloading, extracting,\n and adding necessary directories to sys.path.\n \"\"\"\n\n inspect(message=\"Create directories\")\n # Ensure base directories exist\n os.makedirs(DEPENDENCIES_DIR, exist_ok=True)\n\n # Download and extract dependencies if not already present\n if not os.listdir(DEPENDENCIES_DIR):\n\n inspect(message=\"Extract dependencies\")\n zip_path = os.path.join(DEPENDENCIES_DIR, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n extract_zip(zip_path, DEPENDENCIES_DIR)\n os.remove(zip_path)\n\n # Add the extracted dependencies directory to sys.path\n add_directory_to_sys_path(DEPENDENCIES_DIR)\n inspect(message=\"Added to the sys path\")\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\", 'venv')\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip\", 'venv_static')\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path):\n \"\"\"\n Extracts a zip file to a temporary directory.\n \"\"\"\n # Create a temporary directory\n temp_dir = tempfile.mkdtemp()\n\n # Extract the zip file to the temporary directory\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir)\n\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves the first top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n # Create a temporary directory for extracted files\n temp_dir = tempfile.mkdtemp()\n \n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path)\n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\")\n\n # Clean up the temporary zip file\n os.remove(zip_path)\n shutil.rmtree(temp_dir) # Remove the temporary extraction folder \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" }, "result": true } @@ -748,7 +748,7 @@ }, "id": "fusets_mogpr", "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", - "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focusses on the fusion of Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n| Name | Description | Type | Default |\n|---|---|---|---------|\n| polygon | Polygon representing the AOI on which to apply the data fusion | GeoJSON | | \n| temporal_extent | Date range for which to apply the data fusion | Array | |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI | \n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", + "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n| Name | Description | Type | Default |\n|---|---|---|---------|\n| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON |         | \n| temporal_extent | Date range for which to apply the data fusion | Array |         |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI     |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI       | \n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Dependencies\n\nIn addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF):\n\n* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. \n\n* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html).\n\n\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", "parameters": [ { "name": "spatial_extent", diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index a7ecba4c..50b965cd 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -1,12 +1,15 @@ +#%% + import os import sys import zipfile import requests +import tempfile +import shutil import functools from openeo.udf import inspect - def download_file(url, path): """ Downloads a file from the given URL to the specified path. @@ -15,45 +18,75 @@ def download_file(url, path): with open(path, "wb") as file: file.write(response.content) - -def extract_zip(zip_path, extract_to): +def extract_zip_to_temp(zip_path): """ - Extracts a zip file from zip_path to the specified extract_to directory. + Extracts a zip file to a temporary directory. """ + # Create a temporary directory + temp_dir = tempfile.mkdtemp() + + # Extract the zip file to the temporary directory with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(extract_to) + zip_ref.extractall(temp_dir) + return temp_dir -def add_directory_to_sys_path(directory): +def move_top_level_folder_to_destination(temp_dir, destination_dir): """ - Adds a directory to the Python sys.path if it's not already present. + Moves the first top-level folder from the temporary directory to the destination directory. + Throws an error if the folder already exists at the destination. """ - if directory not in sys.path: - sys.path.insert(0, directory) + # Find the top-level folders inside the extracted zip + for item in os.listdir(temp_dir): + item_path = os.path.join(temp_dir, item) + + if os.path.isdir(item_path): + # Check if the folder already exists at destination + dest_path = os.path.join(destination_dir, item) + + if os.path.exists(dest_path): + # Throw an error if the folder already exists + raise FileExistsError(f"Error: The folder '{item}' already exists in the destination directory: {dest_path}") + + # Move the folder out of temp and into the destination directory + shutil.move(item_path, dest_path) + + +def add_to_sys_path(folder_path): + """ + Adds the folder path to sys.path. + """ + if folder_path not in sys.path: + sys.path.append(folder_path) @functools.lru_cache(maxsize=5) -def setup_dependencies(dependencies_url,DEPENDENCIES_DIR): +def setup_dependencies(dependencies_url): """ - Main function to set up the dependencies by downloading, extracting, - and adding necessary directories to sys.path. + Main function to download, unzip, move the top-level folder, and add it to sys.path. """ + # Create a temporary directory for extracted files + temp_dir = tempfile.mkdtemp() + + # Step 1: Download the zip file + zip_path = os.path.join(temp_dir, "temp.zip") + download_file(dependencies_url, zip_path) + + inspect(message="Extract dependencies to temp") + # Step 2: Extract the zip file to the temporary directory + extracted_dir = extract_zip_to_temp(zip_path) - inspect(message="Create directories") - # Ensure base directories exist - os.makedirs(DEPENDENCIES_DIR, exist_ok=True) + # Step 3: Move the first top-level folder (dynamically) to the destination + destination_dir = os.getcwd() # Current working directory + inspect(message="Move top-level folder to destination") + moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir) - # Download and extract dependencies if not already present - if not os.listdir(DEPENDENCIES_DIR): + # Step 4: Add the folder to sys.path + add_to_sys_path(moved_folder) + inspect(message="Added to the sys path") - inspect(message="Extract dependencies") - zip_path = os.path.join(DEPENDENCIES_DIR, "temp.zip") - download_file(dependencies_url, zip_path) - extract_zip(zip_path, DEPENDENCIES_DIR) - os.remove(zip_path) + # Clean up the temporary zip file + os.remove(zip_path) + shutil.rmtree(temp_dir) # Remove the temporary extraction folder - # Add the extracted dependencies directory to sys.path - add_directory_to_sys_path(DEPENDENCIES_DIR) - inspect(message="Added to the sys path") -setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip", 'venv') -setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets.zip", 'venv_static') \ No newline at end of file +setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip") \ No newline at end of file From e21d20eb2a0e36ec487311584bd8ef5edf14e3c4 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Wed, 29 Jan 2025 10:52:59 +0100 Subject: [PATCH 20/26] latest changes in dependencies implemented --- algorithm_catalog/fusets_mogpr.json | 6 ++--- benchmark_scenarios/fusets_mogpr.json | 32 +++++++++++++-------------- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json index 2c5a053b..364f0dc3 100644 --- a/algorithm_catalog/fusets_mogpr.json +++ b/algorithm_catalog/fusets_mogpr.json @@ -6,8 +6,8 @@ ], "geometry": null, "properties": { - "created": "2025-01-093T00:00:00Z", - "updated": "2025-01-09T00:00:00Z", + "created": "2025-01-09T00:00:00Z", + "updated": "2025-01-29T00:00:00Z", "type": "apex_algorithm", "title": "Multi output gaussian process regression", "description": "Integrates timeseries in data cube using multi-output gaussian process regression. The service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other indicators that are correlated with each other.", @@ -130,7 +130,7 @@ "rel": "example", "type": "application/json", "title": "Example output", - "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.nc" + "href": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/fusets_mogpr.nc" } ] } \ No newline at end of file diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index b38247ba..79a7db27 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -5,42 +5,40 @@ "description": "Multi output gaussian process regression example on NDVI timeseries", "backend": "openeofed.dataspace.copernicus.eu", "process_graph": { - "mogprs1s21601251": { + "fusetsmogpr": { "arguments": { - "include_raw_inputs": true, - "include_uncertainties": true, "s1_collection": "RVI", "s2_collection": "NDVI", "spatial_extent": { "coordinates": [ [ [ - 5.170012098271149, - 51.25062964728295 + 5.178303838475193, + 51.252856237848164 ], [ - 5.17085904378298, - 51.24882567194015 + 5.178003609252369, + 51.25109194151486 ], [ - 5.17857421368097, - 51.2468515482926 + 5.179280940922463, + 51.25103833409551 ], [ - 5.178972704726344, - 51.24982704376254 + 5.179565949577788, + 51.25278555186941 ], [ - 5.170012098271149, - 51.25062964728295 + 5.178303838475193, + 51.252856237848164 ] ] ], "type": "Polygon" }, "temporal_extent": [ - "2023-01-01", - "2023-12-31" + "2021-01-01", + "2021-12-15" ] }, "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", @@ -49,7 +47,7 @@ "saveresult1": { "arguments": { "data": { - "from_node": "mogprs1s21601251" + "from_node": "fusetsmogpr" }, "format": "netCDF", "options": {} @@ -59,7 +57,7 @@ } }, "reference_data": { - "timeseries.nc": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/timeseries.nc" + "timeseries.nc": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/fusets_mogpr/fusets_mogpr.nc" } } ] \ No newline at end of file From b84e037935d2f403c32aa94c6285d189fbdf7676 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:00:39 +0100 Subject: [PATCH 21/26] corrected encoding --- openeo_udp/fusets_mogpr/README.md | 13 +- openeo_udp/fusets_mogpr/fusets_mogpr.json | 1662 ++++++++++----------- 2 files changed, 838 insertions(+), 837 deletions(-) diff --git a/openeo_udp/fusets_mogpr/README.md b/openeo_udp/fusets_mogpr/README.md index 1206e0e0..4ffb7a6d 100644 --- a/openeo_udp/fusets_mogpr/README.md +++ b/openeo_udp/fusets_mogpr/README.md @@ -6,12 +6,13 @@ This service is designed to enable multi-output regression analysis using Gaussi The `fusets_mogpr_s1s2` service requires the following parameters: -| Name | Description | Type | Default | -|---|---|---|---------| -| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON |         | -| temporal_extent | Date range for which to apply the data fusion | Array |         | -| s1_collection | S1 data collection to use for the fusion | Text | RVI     | -| s2_collection | S2 data collection to use for fusing the data | Text | NDVI       | + +| Name | Description | Type | Default | +| --------------- | -------------------------------------------------------------- | ------- | ------- | +| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON | | +| temporal_extent | Date range for which to apply the data fusion | Array | | +| s1_collection | S1 data collection to use for the fusion | Text | RVI | +| s2_collection | S2 data collection to use for fusing the data | Text | NDVI | ## Supported collections diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index 98968d0a..faa2b023 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -1,900 +1,900 @@ { - "process_graph": { - "biopar1": { - "process_id": "biopar", - "arguments": { - "biopar_type": "CWC", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "biopar2": { - "process_id": "biopar", - "arguments": { - "biopar_type": "CCC", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "loadcollection1": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "B02", - "B04", - "B08" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "loadcollection2": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "SCL" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "toscldilationmask1": { - "process_id": "to_scl_dilation_mask", - "arguments": { - "data": { - "from_node": "loadcollection2" - } - } - }, - "mask1": { - "process_id": "mask", - "arguments": { - "data": { - "from_node": "loadcollection1" - }, - "mask": { - "from_node": "toscldilationmask1" - } - } - }, - "reducedimension1": { - "process_id": "reduce_dimension", - "arguments": { + "process_graph": { + "biopar1": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CWC", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar2": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CCC", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B02", + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection2": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask1": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection2" + } + } + }, + "mask1": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection1" + }, + "mask": { + "from_node": "toscldilationmask1" + } + } + }, + "reducedimension1": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "mask1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement1": { + "process_id": "array_element", + "arguments": { "data": { - "from_node": "mask1" + "from_parameter": "data" }, - "dimension": "bands", - "reducer": { - "process_graph": { - "arrayelement1": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" - }, - "index": 2 - } - }, - "arrayelement2": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" - }, - "index": 1 - } - }, - "subtract1": { - "process_id": "subtract", - "arguments": { - "x": { - "from_node": "arrayelement1" - }, - "y": { - "from_node": "arrayelement2" - } - } - }, - "multiply1": { - "process_id": "multiply", - "arguments": { - "x": 2.5, - "y": { - "from_node": "subtract1" - } - } - }, - "multiply2": { - "process_id": "multiply", - "arguments": { - "x": 6, - "y": { - "from_node": "arrayelement2" - } - } - }, - "add1": { - "process_id": "add", - "arguments": { - "x": { - "from_node": "arrayelement1" - }, - "y": { - "from_node": "multiply2" - } - } - }, - "arrayelement3": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" - }, - "index": 1 - } - }, - "multiply3": { - "process_id": "multiply", - "arguments": { - "x": 7.5, - "y": { - "from_node": "arrayelement3" - } - } - }, - "subtract2": { - "process_id": "subtract", - "arguments": { - "x": { - "from_node": "add1" - }, - "y": { - "from_node": "multiply3" - } - } - }, - "add2": { - "process_id": "add", - "arguments": { - "x": { - "from_node": "subtract2" - }, - "y": 1 - } - }, - "divide1": { - "process_id": "divide", - "arguments": { - "x": { - "from_node": "multiply1" - }, - "y": { - "from_node": "add2" - } - }, - "result": true - } - } - } - } - }, - "adddimension1": { - "process_id": "add_dimension", - "arguments": { + "index": 2 + } + }, + "arrayelement2": { + "process_id": "array_element", + "arguments": { "data": { - "from_node": "reducedimension1" - }, - "label": "EVI", - "name": "bands", - "type": "bands" - } - }, - "biopar3": { - "process_id": "biopar", - "arguments": { - "biopar_type": "FCOVER", - "date": { - "from_parameter": "temporal_extent" + "from_parameter": "data" }, - "polygon": { - "from_parameter": "spatial_extent" - } + "index": 1 + } }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "biopar4": { - "process_id": "biopar", - "arguments": { - "biopar_type": "LAI", - "date": { - "from_parameter": "temporal_extent" + "subtract1": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "arrayelement1" }, - "polygon": { - "from_parameter": "spatial_extent" + "y": { + "from_node": "arrayelement2" } + } }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "biopar5": { - "process_id": "biopar", - "arguments": { - "biopar_type": "FAPAR", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" + "multiply1": { + "process_id": "multiply", + "arguments": { + "x": 2.5, + "y": { + "from_node": "subtract1" } + } }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "loadcollection3": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "B04", - "B08" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "loadcollection4": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "SCL" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "toscldilationmask2": { - "process_id": "to_scl_dilation_mask", - "arguments": { - "data": { - "from_node": "loadcollection4" + "multiply2": { + "process_id": "multiply", + "arguments": { + "x": 6.0, + "y": { + "from_node": "arrayelement2" } - } - }, - "mask2": { - "process_id": "mask", - "arguments": { - "data": { - "from_node": "loadcollection3" + } + }, + "add1": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement1" }, - "mask": { - "from_node": "toscldilationmask2" + "y": { + "from_node": "multiply2" } - } - }, - "ndvi1": { - "process_id": "ndvi", - "arguments": { + } + }, + "arrayelement3": { + "process_id": "array_element", + "arguments": { "data": { - "from_node": "mask2" + "from_parameter": "data" }, - "nir": "B08", - "red": "B04", - "target_band": "NDVI" - } - }, - "filterbands1": { - "process_id": "filter_bands", - "arguments": { - "bands": [ - "NDVI" - ], - "data": { - "from_node": "ndvi1" + "index": 1 + } + }, + "multiply3": { + "process_id": "multiply", + "arguments": { + "x": 7.5, + "y": { + "from_node": "arrayelement3" } - } - }, - "eq1": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, + } + }, + "subtract2": { + "process_id": "subtract", + "arguments": { "x": { - "from_parameter": "s2_collection" + "from_node": "add1" }, - "y": "ndvi" - } - }, - "if1": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "filterbands1" - }, - "reject": null, - "value": { - "from_node": "eq1" + "y": { + "from_node": "multiply3" } - } - }, - "eq2": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, + } + }, + "add2": { + "process_id": "add", + "arguments": { "x": { - "from_parameter": "s2_collection" - }, - "y": "fapar" - } - }, - "if2": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar5" + "from_node": "subtract2" }, - "reject": { - "from_node": "if1" - }, - "value": { - "from_node": "eq2" - } - } - }, - "eq3": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, + "y": 1.0 + } + }, + "divide1": { + "process_id": "divide", + "arguments": { "x": { - "from_parameter": "s2_collection" - }, - "y": "lai" - } - }, - "if3": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar4" + "from_node": "multiply1" }, - "reject": { - "from_node": "if2" - }, - "value": { - "from_node": "eq3" + "y": { + "from_node": "add2" } + }, + "result": true } + } + } + } + }, + "adddimension1": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension1" + }, + "label": "EVI", + "name": "bands", + "type": "bands" + } + }, + "biopar3": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FCOVER", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar4": { + "process_id": "biopar", + "arguments": { + "biopar_type": "LAI", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar5": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FAPAR", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" + } + }, + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection3": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection4": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask2": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection4" + } + } + }, + "mask2": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection3" + }, + "mask": { + "from_node": "toscldilationmask2" + } + } + }, + "ndvi1": { + "process_id": "ndvi", + "arguments": { + "data": { + "from_node": "mask2" + }, + "nir": "B08", + "red": "B04", + "target_band": "NDVI" + } + }, + "filterbands1": { + "process_id": "filter_bands", + "arguments": { + "bands": [ + "NDVI" + ], + "data": { + "from_node": "ndvi1" + } + } + }, + "eq1": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ndvi" + } + }, + "if1": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "filterbands1" + }, + "reject": null, + "value": { + "from_node": "eq1" + } + } + }, + "eq2": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fapar" + } + }, + "if2": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar5" }, - "eq4": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "fcover" - } - }, - "if4": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar3" - }, - "reject": { - "from_node": "if3" - }, - "value": { - "from_node": "eq4" - } - } + "reject": { + "from_node": "if1" }, - "eq5": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "evi" - } + "value": { + "from_node": "eq2" + } + } + }, + "eq3": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "lai" + } + }, + "if3": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar4" }, - "if5": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "adddimension1" - }, - "reject": { - "from_node": "if4" - }, - "value": { - "from_node": "eq5" - } - } + "reject": { + "from_node": "if2" }, - "eq6": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "ccc" - } + "value": { + "from_node": "eq3" + } + } + }, + "eq4": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fcover" + } + }, + "if4": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar3" }, - "if6": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar2" - }, - "reject": { - "from_node": "if5" - }, - "value": { - "from_node": "eq6" - } - } + "reject": { + "from_node": "if3" }, - "eq7": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "cwc" - } + "value": { + "from_node": "eq4" + } + } + }, + "eq5": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "evi" + } + }, + "if5": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension1" }, - "if7": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar1" - }, - "reject": { - "from_node": "if6" - }, - "value": { - "from_node": "eq7" - } - } + "reject": { + "from_node": "if4" }, - "loadcollection5": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "VV", - "VH" - ], - "id": "SENTINEL1_GRD", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } + "value": { + "from_node": "eq5" + } + } + }, + "eq6": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ccc" + } + }, + "if6": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar2" }, - "sarbackscatter1": { - "process_id": "sar_backscatter", - "arguments": { - "coefficient": "sigma0-ellipsoid", - "contributing_area": false, - "data": { - "from_node": "loadcollection5" - }, - "elevation_model": null, - "ellipsoid_incidence_angle": false, - "local_incidence_angle": false, - "mask": false, - "noise_removal": true - } + "reject": { + "from_node": "if5" }, - "renamelabels1": { - "process_id": "rename_labels", - "arguments": { - "data": { - "from_node": "sarbackscatter1" - }, - "dimension": "bands", - "target": [ - "VV", - "VH" - ] - } + "value": { + "from_node": "eq6" + } + } + }, + "eq7": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "cwc" + } + }, + "if7": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar1" }, - "reducedimension2": { - "process_id": "reduce_dimension", - "arguments": { - "data": { - "from_node": "renamelabels1" - }, - "dimension": "bands", - "reducer": { - "process_graph": { - "arrayelement4": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" - }, - "index": 1 - } - }, - "add3": { - "process_id": "add", - "arguments": { - "x": { - "from_node": "arrayelement4" - }, - "y": { - "from_node": "arrayelement4" - } - } - }, - "arrayelement5": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" - }, - "index": 0 - } - }, - "add4": { - "process_id": "add", - "arguments": { - "x": { - "from_node": "arrayelement5" - }, - "y": { - "from_node": "arrayelement4" - } - } - }, - "divide2": { - "process_id": "divide", - "arguments": { - "x": { - "from_node": "add3" - }, - "y": { - "from_node": "add4" - } - }, - "result": true - } - } - } - } + "reject": { + "from_node": "if6" }, - "adddimension2": { - "process_id": "add_dimension", - "arguments": { + "value": { + "from_node": "eq7" + } + } + }, + "loadcollection5": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "sarbackscatter1": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection5" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels1": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter1" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "reducedimension2": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "renamelabels1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement4": { + "process_id": "array_element", + "arguments": { "data": { - "from_node": "reducedimension2" + "from_parameter": "data" }, - "label": "RVI", - "name": "bands", - "type": "bands" - } - }, - "loadcollection6": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "VV", - "VH" - ], - "id": "SENTINEL1_GRD", - "spatial_extent": { - "from_parameter": "spatial_extent" + "index": 1 + } + }, + "add3": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement4" }, - "temporal_extent": { - "from_parameter": "temporal_extent" + "y": { + "from_node": "arrayelement4" } - } - }, - "sarbackscatter2": { - "process_id": "sar_backscatter", - "arguments": { - "coefficient": "sigma0-ellipsoid", - "contributing_area": false, - "data": { - "from_node": "loadcollection6" - }, - "elevation_model": null, - "ellipsoid_incidence_angle": false, - "local_incidence_angle": false, - "mask": false, - "noise_removal": true - } - }, - "renamelabels2": { - "process_id": "rename_labels", - "arguments": { + } + }, + "arrayelement5": { + "process_id": "array_element", + "arguments": { "data": { - "from_node": "sarbackscatter2" + "from_parameter": "data" }, - "dimension": "bands", - "target": [ - "VV", - "VH" - ] - } - }, - "eq8": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, + "index": 0 + } + }, + "add4": { + "process_id": "add", + "arguments": { "x": { - "from_parameter": "s1_collection" + "from_node": "arrayelement5" }, - "y": "grd" - } - }, - "if8": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "renamelabels2" - }, - "reject": null, - "value": { - "from_node": "eq8" + "y": { + "from_node": "arrayelement4" } - } - }, - "eq9": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, + } + }, + "divide2": { + "process_id": "divide", + "arguments": { "x": { - "from_parameter": "s1_collection" - }, - "y": "rvi" - } - }, - "if9": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "adddimension2" - }, - "reject": { - "from_node": "if8" + "from_node": "add3" }, - "value": { - "from_node": "eq9" + "y": { + "from_node": "add4" } + }, + "result": true } + } + } + } + }, + "adddimension2": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension2" + }, + "label": "RVI", + "name": "bands", + "type": "bands" + } + }, + "loadcollection6": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "sarbackscatter2": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection6" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels2": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter2" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "eq8": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s1_collection" + }, + "y": "grd" + } + }, + "if8": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "renamelabels2" + }, + "reject": null, + "value": { + "from_node": "eq8" + } + } + }, + "eq9": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s1_collection" + }, + "y": "rvi" + } + }, + "if9": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension2" }, - "mergecubes1": { - "process_id": "merge_cubes", - "arguments": { - "cube1": { - "from_node": "if7" - }, - "cube2": { - "from_node": "if9" - } - } + "reject": { + "from_node": "if8" }, - "applyneighborhood1": { - "process_id": "apply_neighborhood", - "arguments": { - "data": { - "from_node": "mergecubes1" - }, - "overlap": [], - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "context": {}, - "data": { - "from_parameter": "data" - }, - "runtime": "Python", - "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path):\n \"\"\"\n Extracts a zip file to a temporary directory.\n \"\"\"\n # Create a temporary directory\n temp_dir = tempfile.mkdtemp()\n\n # Extract the zip file to the temporary directory\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir)\n\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves the first top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n # Create a temporary directory for extracted files\n temp_dir = tempfile.mkdtemp()\n \n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path)\n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\")\n\n # Clean up the temporary zip file\n os.remove(zip_path)\n shutil.rmtree(temp_dir) # Remove the temporary extraction folder \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" - }, - "result": true - } - } - }, - "size": [ - { - "dimension": "x", - "value": 32, - "unit": "px" - }, - { - "dimension": "y", - "value": 32, - "unit": "px" - } - ] - }, - "result": true + "value": { + "from_node": "eq9" + } + } + }, + "mergecubes1": { + "process_id": "merge_cubes", + "arguments": { + "cube1": { + "from_node": "if7" + }, + "cube2": { + "from_node": "if9" } + } }, - "id": "fusets_mogpr", - "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", - "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n| Name | Description | Type | Default |\n|---|---|---|---------|\n| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON |         | \n| temporal_extent | Date range for which to apply the data fusion | Array |         |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI     |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI       | \n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Dependencies\n\nIn addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF):\n\n* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. \n\n* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html).\n\n\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", - "parameters": [ + "applyneighborhood1": { + "process_id": "apply_neighborhood", + "arguments": { + "data": { + "from_node": "mergecubes1" + }, + "overlap": [], + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "context": {}, + "data": { + "from_parameter": "data" + }, + "runtime": "Python", + "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path):\n \"\"\"\n Extracts a zip file to a temporary directory.\n \"\"\"\n # Create a temporary directory\n temp_dir = tempfile.mkdtemp()\n\n # Extract the zip file to the temporary directory\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir)\n\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves the first top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n # Create a temporary directory for extracted files\n temp_dir = tempfile.mkdtemp()\n \n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path)\n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\")\n\n # Clean up the temporary zip file\n os.remove(zip_path)\n shutil.rmtree(temp_dir) # Remove the temporary extraction folder \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + }, + "result": true + } + } + }, + "size": [ + { + "dimension": "x", + "value": 32, + "unit": "px" + }, + { + "dimension": "y", + "value": 32, + "unit": "px" + } + ] + }, + "result": true + } + }, + "id": "fusets_mogpr", + "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", + "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n\n| Name | Description | Type | Default |\n| --------------- | -------------------------------------------------------------- | ------- | ------- |\n| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON | |\n| temporal_extent | Date range for which to apply the data fusion | Array | |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI |\n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Dependencies\n\nIn addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF):\n\n* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. \n\n* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html).\n\n\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", + "parameters": [ + { + "name": "spatial_extent", + "description": "Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent.", + "schema": [ { - "name": "spatial_extent", - "description": "Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent.", - "schema": [ - { - "title": "Bounding Box", - "type": "object", - "subtype": "bounding-box", - "required": [ - "west", - "south", - "east", - "north" - ], - "properties": { - "west": { - "description": "West (lower left corner, coordinate axis 1).", - "type": "number" - }, - "south": { - "description": "South (lower left corner, coordinate axis 2).", - "type": "number" - }, - "east": { - "description": "East (upper right corner, coordinate axis 1).", - "type": "number" - }, - "north": { - "description": "North (upper right corner, coordinate axis 2).", - "type": "number" - }, - "base": { - "description": "Base (optional, lower left corner, coordinate axis 3).", - "type": [ - "number", - "null" - ], - "default": null - }, - "height": { - "description": "Height (optional, upper right corner, coordinate axis 3).", - "type": [ - "number", - "null" - ], - "default": null - }, - "crs": { - "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", - "anyOf": [ - { - "title": "EPSG Code", - "type": "integer", - "subtype": "epsg-code", - "minimum": 1000, - "examples": [ - 3857 - ] - }, - { - "title": "WKT2", - "type": "string", - "subtype": "wkt2-definition" - } - ], - "default": 4326 - } - } - }, + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": [ + "west", + "south", + "east", + "north" + ], + "properties": { + "west": { + "description": "West (lower left corner, coordinate axis 1).", + "type": "number" + }, + "south": { + "description": "South (lower left corner, coordinate axis 2).", + "type": "number" + }, + "east": { + "description": "East (upper right corner, coordinate axis 1).", + "type": "number" + }, + "north": { + "description": "North (upper right corner, coordinate axis 2).", + "type": "number" + }, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ { - "title": "Vector data cube", - "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" - } - ] + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [ + 3857 + ] }, { - "title": "No filter", - "description": "Don't filter spatially. All data is included in the data cube.", - "type": "null" + "title": "WKT2", + "type": "string", + "subtype": "wkt2-definition" } - ] + ], + "default": 4326 + } + } }, { - "name": "temporal_extent", - "description": "Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion", - "schema": { - "type": "array", - "subtype": "temporal-interval", - "uniqueItems": true, - "minItems": 2, - "maxItems": 2, - "items": { - "anyOf": [ - { - "type": "string", - "subtype": "date-time", - "format": "date-time" - }, - { - "type": "string", - "subtype": "date", - "format": "date" - }, - { - "type": "null" - } - ] - } + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" } + ] }, { - "name": "s1_collection", - "description": "S1 data collection to use for fusing the data", - "schema": { - "type": "string", - "enum": [ - "RVI", - "GRD" - ] + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null" + } + ] + }, + { + "name": "temporal_extent", + "description": "Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion", + "schema": { + "type": "array", + "subtype": "temporal-interval", + "uniqueItems": true, + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + { + "type": "string", + "subtype": "date-time", + "format": "date-time" }, - "default": "RVI", - "optional": true - }, - { - "name": "s2_collection", - "description": "S2 data collection to use for fusing the data", - "schema": { - "type": "string", - "enum": [ - "NDVI", - "FAPAR", - "LAI", - "FCOVER", - "EVI", - "CCC", - "CWC" - ] + { + "type": "string", + "subtype": "date", + "format": "date" }, - "default": "NDVI", - "optional": true + { + "type": "null" + } + ] } - ] + } + }, + { + "name": "s1_collection", + "description": "S1 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "RVI", + "GRD" + ] + }, + "default": "RVI", + "optional": true + }, + { + "name": "s2_collection", + "description": "S2 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "NDVI", + "FAPAR", + "LAI", + "FCOVER", + "EVI", + "CCC", + "CWC" + ] + }, + "default": "NDVI", + "optional": true + } + ] } \ No newline at end of file From 5c95e0401437f4e947beb9988f6df61b710b9c85 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Wed, 29 Jan 2025 11:03:48 +0100 Subject: [PATCH 22/26] temporary link --- algorithm_catalog/fusets_mogpr.json | 2 +- benchmark_scenarios/fusets_mogpr.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/algorithm_catalog/fusets_mogpr.json b/algorithm_catalog/fusets_mogpr.json index 364f0dc3..a4a44598 100644 --- a/algorithm_catalog/fusets_mogpr.json +++ b/algorithm_catalog/fusets_mogpr.json @@ -114,7 +114,7 @@ "rel": "openeo-process", "type": "application/json", "title": "openEO Process Definition", - "href": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json" + "href": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/09413be3c27e0e695d426c9ffe5a0fe90beefe65/openeo_udp/fusets_mogpr/fusets_mogpr.json" }, { "rel": "service", diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index 79a7db27..cb415483 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -41,7 +41,7 @@ "2021-12-15" ] }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/refs/heads/mogpr_v1/openeo_udp/fusets_mogpr/fusets_mogpr.json", + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/09413be3c27e0e695d426c9ffe5a0fe90beefe65/openeo_udp/fusets_mogpr/fusets_mogpr.json", "process_id": "fusets_mogpr" }, "saveresult1": { From a3aef89f6baed25ed15fd046762dfc9d219512e9 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 4 Feb 2025 09:11:56 +0100 Subject: [PATCH 23/26] minor changes with text --- openeo_udp/fusets_mogpr/fusets_mogpr.json | 2 +- openeo_udp/fusets_mogpr/helpers.py | 4 ++-- openeo_udp/fusets_mogpr/requirements.txt | 1 + openeo_udp/fusets_mogpr/set_path.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 openeo_udp/fusets_mogpr/requirements.txt diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index faa2b023..df399eb6 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -724,7 +724,7 @@ "from_parameter": "data" }, "runtime": "Python", - "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path):\n \"\"\"\n Extracts a zip file to a temporary directory.\n \"\"\"\n # Create a temporary directory\n temp_dir = tempfile.mkdtemp()\n\n # Extract the zip file to the temporary directory\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir)\n\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves the first top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n # Create a temporary directory for extracted files\n temp_dir = tempfile.mkdtemp()\n \n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path)\n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\")\n\n # Clean up the temporary zip file\n os.remove(zip_path)\n shutil.rmtree(temp_dir) # Remove the temporary extraction folder \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path):\n \"\"\"\n Extracts a zip file to a temporary directory.\n \"\"\"\n # Create a temporary directory\n temp_dir = tempfile.mkdtemp()\n\n # Extract the zip file to the temporary directory\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir)\n\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves each top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n # Create a temporary directory for extracted files\n temp_dir = tempfile.mkdtemp()\n \n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path)\n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\")\n\n # Clean up the temporary zip file\n os.remove(zip_path)\n shutil.rmtree(temp_dir) # Remove the temporary extraction folder \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" }, "result": true } diff --git a/openeo_udp/fusets_mogpr/helpers.py b/openeo_udp/fusets_mogpr/helpers.py index 0ac4875e..f5335e1a 100644 --- a/openeo_udp/fusets_mogpr/helpers.py +++ b/openeo_udp/fusets_mogpr/helpers.py @@ -134,7 +134,7 @@ def _build_collection_graph(collection, label, callable, reject): def load_s1_collection(connection, collection, polygon, date): """ - Create a S1 input data cube based on the collection selected by the user. This achieved by building an + Create an S1 input data cube based on the collection selected by the user. This achieved by building an if-else structure through the different openEO processes, making sure that the correct datacube is selected when executing the UDP. @@ -164,7 +164,7 @@ def load_s1_collection(connection, collection, polygon, date): def load_s2_collection(connection, collection, polygon, date): """ - Create a S2 input data cube based on the collection selected by the user. This achieved by building an + Create an S2 input data cube based on the collection selected by the user. This achieved by building an if-else structure through the different openEO processes, making sure that the correct datacube is selected when executing the UDP. diff --git a/openeo_udp/fusets_mogpr/requirements.txt b/openeo_udp/fusets_mogpr/requirements.txt new file mode 100644 index 00000000..fddacbd7 --- /dev/null +++ b/openeo_udp/fusets_mogpr/requirements.txt @@ -0,0 +1 @@ +fusets>=2.0.1 \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index 50b965cd..b16ae218 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -33,7 +33,7 @@ def extract_zip_to_temp(zip_path): def move_top_level_folder_to_destination(temp_dir, destination_dir): """ - Moves the first top-level folder from the temporary directory to the destination directory. + Moves each top-level folder from the temporary directory to the destination directory. Throws an error if the folder already exists at the destination. """ # Find the top-level folders inside the extracted zip From aa7b43dc862f0f0c5ee54a0c9709c3272facf1aa Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Tue, 4 Feb 2025 09:12:29 +0100 Subject: [PATCH 24/26] removed fusets from requirements --- qa/unittests/requirements.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/unittests/requirements.txt b/qa/unittests/requirements.txt index b1c0ca1f..01be1428 100644 --- a/qa/unittests/requirements.txt +++ b/qa/unittests/requirements.txt @@ -3,4 +3,3 @@ git+https://github.com/ESA-APEx/esa-apex-toolbox-python.git@main pytest>=8.2.0 moto[s3, server]>=5.0.13 dirty-equals>=0.8.0 -fusets>=2.0.1 From 43ed5183273cea175d389a8ab7aa11b8e6602124 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Wed, 5 Feb 2025 09:20:17 +0100 Subject: [PATCH 25/26] final changes with the temp_dir --- openeo_udp/fusets_mogpr/fusets_mogpr.json | 1662 ++++++++++----------- openeo_udp/fusets_mogpr/set_path.py | 52 +- 2 files changed, 852 insertions(+), 862 deletions(-) diff --git a/openeo_udp/fusets_mogpr/fusets_mogpr.json b/openeo_udp/fusets_mogpr/fusets_mogpr.json index df399eb6..3e403037 100644 --- a/openeo_udp/fusets_mogpr/fusets_mogpr.json +++ b/openeo_udp/fusets_mogpr/fusets_mogpr.json @@ -1,900 +1,900 @@ { - "process_graph": { - "biopar1": { - "process_id": "biopar", - "arguments": { - "biopar_type": "CWC", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "biopar2": { - "process_id": "biopar", - "arguments": { - "biopar_type": "CCC", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "loadcollection1": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "B02", - "B04", - "B08" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "loadcollection2": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "SCL" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "toscldilationmask1": { - "process_id": "to_scl_dilation_mask", - "arguments": { - "data": { - "from_node": "loadcollection2" - } - } - }, - "mask1": { - "process_id": "mask", - "arguments": { - "data": { - "from_node": "loadcollection1" - }, - "mask": { - "from_node": "toscldilationmask1" - } - } - }, - "reducedimension1": { - "process_id": "reduce_dimension", - "arguments": { - "data": { - "from_node": "mask1" - }, - "dimension": "bands", - "reducer": { - "process_graph": { - "arrayelement1": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" + "process_graph": { + "biopar1": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CWC", + "date": { + "from_parameter": "temporal_extent" }, - "index": 2 - } + "polygon": { + "from_parameter": "spatial_extent" + } }, - "arrayelement2": { - "process_id": "array_element", - "arguments": { - "data": { - "from_parameter": "data" + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar2": { + "process_id": "biopar", + "arguments": { + "biopar_type": "CCC", + "date": { + "from_parameter": "temporal_extent" }, - "index": 1 - } + "polygon": { + "from_parameter": "spatial_extent" + } }, - "subtract1": { - "process_id": "subtract", - "arguments": { - "x": { - "from_node": "arrayelement1" + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B02", + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" }, - "y": { - "from_node": "arrayelement2" + "temporal_extent": { + "from_parameter": "temporal_extent" } - } - }, - "multiply1": { - "process_id": "multiply", - "arguments": { - "x": 2.5, - "y": { - "from_node": "subtract1" + } + }, + "loadcollection2": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask1": { + "process_id": "to_scl_dilation_mask", + "arguments": { + "data": { + "from_node": "loadcollection2" + } + } + }, + "mask1": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection1" + }, + "mask": { + "from_node": "toscldilationmask1" + } + } + }, + "reducedimension1": { + "process_id": "reduce_dimension", + "arguments": { + "data": { + "from_node": "mask1" + }, + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement1": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 2 + } + }, + "arrayelement2": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "subtract1": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "arrayelement1" + }, + "y": { + "from_node": "arrayelement2" + } + } + }, + "multiply1": { + "process_id": "multiply", + "arguments": { + "x": 2.5, + "y": { + "from_node": "subtract1" + } + } + }, + "multiply2": { + "process_id": "multiply", + "arguments": { + "x": 6, + "y": { + "from_node": "arrayelement2" + } + } + }, + "add1": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement1" + }, + "y": { + "from_node": "multiply2" + } + } + }, + "arrayelement3": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "multiply3": { + "process_id": "multiply", + "arguments": { + "x": 7.5, + "y": { + "from_node": "arrayelement3" + } + } + }, + "subtract2": { + "process_id": "subtract", + "arguments": { + "x": { + "from_node": "add1" + }, + "y": { + "from_node": "multiply3" + } + } + }, + "add2": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "subtract2" + }, + "y": 1 + } + }, + "divide1": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "multiply1" + }, + "y": { + "from_node": "add2" + } + }, + "result": true + } + } + } + } + }, + "adddimension1": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension1" + }, + "label": "EVI", + "name": "bands", + "type": "bands" + } + }, + "biopar3": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FCOVER", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" } - } }, - "multiply2": { - "process_id": "multiply", - "arguments": { - "x": 6.0, - "y": { - "from_node": "arrayelement2" + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar4": { + "process_id": "biopar", + "arguments": { + "biopar_type": "LAI", + "date": { + "from_parameter": "temporal_extent" + }, + "polygon": { + "from_parameter": "spatial_extent" } - } }, - "add1": { - "process_id": "add", - "arguments": { - "x": { - "from_node": "arrayelement1" + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "biopar5": { + "process_id": "biopar", + "arguments": { + "biopar_type": "FAPAR", + "date": { + "from_parameter": "temporal_extent" }, - "y": { - "from_node": "multiply2" + "polygon": { + "from_parameter": "spatial_extent" } - } }, - "arrayelement3": { - "process_id": "array_element", - "arguments": { + "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" + }, + "loadcollection3": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "B04", + "B08" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "loadcollection4": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "SCL" + ], + "id": "SENTINEL2_L2A", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } + }, + "toscldilationmask2": { + "process_id": "to_scl_dilation_mask", + "arguments": { "data": { - "from_parameter": "data" + "from_node": "loadcollection4" + } + } + }, + "mask2": { + "process_id": "mask", + "arguments": { + "data": { + "from_node": "loadcollection3" }, - "index": 1 - } - }, - "multiply3": { - "process_id": "multiply", - "arguments": { - "x": 7.5, - "y": { - "from_node": "arrayelement3" + "mask": { + "from_node": "toscldilationmask2" } - } - }, - "subtract2": { - "process_id": "subtract", - "arguments": { + } + }, + "ndvi1": { + "process_id": "ndvi", + "arguments": { + "data": { + "from_node": "mask2" + }, + "nir": "B08", + "red": "B04", + "target_band": "NDVI" + } + }, + "filterbands1": { + "process_id": "filter_bands", + "arguments": { + "bands": [ + "NDVI" + ], + "data": { + "from_node": "ndvi1" + } + } + }, + "eq1": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, "x": { - "from_node": "add1" + "from_parameter": "s2_collection" }, - "y": { - "from_node": "multiply3" + "y": "ndvi" + } + }, + "if1": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "filterbands1" + }, + "reject": null, + "value": { + "from_node": "eq1" } - } - }, - "add2": { - "process_id": "add", - "arguments": { + } + }, + "eq2": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, "x": { - "from_node": "subtract2" + "from_parameter": "s2_collection" }, - "y": 1.0 - } - }, - "divide1": { - "process_id": "divide", - "arguments": { + "y": "fapar" + } + }, + "if2": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar5" + }, + "reject": { + "from_node": "if1" + }, + "value": { + "from_node": "eq2" + } + } + }, + "eq3": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, "x": { - "from_node": "multiply1" + "from_parameter": "s2_collection" + }, + "y": "lai" + } + }, + "if3": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar4" }, - "y": { - "from_node": "add2" + "reject": { + "from_node": "if2" + }, + "value": { + "from_node": "eq3" } - }, - "result": true } - } - } - } - }, - "adddimension1": { - "process_id": "add_dimension", - "arguments": { - "data": { - "from_node": "reducedimension1" - }, - "label": "EVI", - "name": "bands", - "type": "bands" - } - }, - "biopar3": { - "process_id": "biopar", - "arguments": { - "biopar_type": "FCOVER", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "biopar4": { - "process_id": "biopar", - "arguments": { - "biopar_type": "LAI", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "biopar5": { - "process_id": "biopar", - "arguments": { - "biopar_type": "FAPAR", - "date": { - "from_parameter": "temporal_extent" - }, - "polygon": { - "from_parameter": "spatial_extent" - } - }, - "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/3b5a011a90f4a3050ff8fdf69ca5bc2fd1535881/openeo_udp/biopar/biopar.json" - }, - "loadcollection3": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "B04", - "B08" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "loadcollection4": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "SCL" - ], - "id": "SENTINEL2_L2A", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "toscldilationmask2": { - "process_id": "to_scl_dilation_mask", - "arguments": { - "data": { - "from_node": "loadcollection4" - } - } - }, - "mask2": { - "process_id": "mask", - "arguments": { - "data": { - "from_node": "loadcollection3" - }, - "mask": { - "from_node": "toscldilationmask2" - } - } - }, - "ndvi1": { - "process_id": "ndvi", - "arguments": { - "data": { - "from_node": "mask2" - }, - "nir": "B08", - "red": "B04", - "target_band": "NDVI" - } - }, - "filterbands1": { - "process_id": "filter_bands", - "arguments": { - "bands": [ - "NDVI" - ], - "data": { - "from_node": "ndvi1" - } - } - }, - "eq1": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "ndvi" - } - }, - "if1": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "filterbands1" - }, - "reject": null, - "value": { - "from_node": "eq1" - } - } - }, - "eq2": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "fapar" - } - }, - "if2": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar5" }, - "reject": { - "from_node": "if1" + "eq4": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "fcover" + } }, - "value": { - "from_node": "eq2" - } - } - }, - "eq3": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "lai" - } - }, - "if3": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar4" + "if4": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar3" + }, + "reject": { + "from_node": "if3" + }, + "value": { + "from_node": "eq4" + } + } }, - "reject": { - "from_node": "if2" + "eq5": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "evi" + } }, - "value": { - "from_node": "eq3" - } - } - }, - "eq4": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "fcover" - } - }, - "if4": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar3" + "if5": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension1" + }, + "reject": { + "from_node": "if4" + }, + "value": { + "from_node": "eq5" + } + } }, - "reject": { - "from_node": "if3" + "eq6": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "ccc" + } }, - "value": { - "from_node": "eq4" - } - } - }, - "eq5": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "evi" - } - }, - "if5": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "adddimension1" + "if6": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar2" + }, + "reject": { + "from_node": "if5" + }, + "value": { + "from_node": "eq6" + } + } }, - "reject": { - "from_node": "if4" + "eq7": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, + "x": { + "from_parameter": "s2_collection" + }, + "y": "cwc" + } }, - "value": { - "from_node": "eq5" - } - } - }, - "eq6": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "ccc" - } - }, - "if6": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar2" + "if7": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "biopar1" + }, + "reject": { + "from_node": "if6" + }, + "value": { + "from_node": "eq7" + } + } }, - "reject": { - "from_node": "if5" + "loadcollection5": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" + }, + "temporal_extent": { + "from_parameter": "temporal_extent" + } + } }, - "value": { - "from_node": "eq6" - } - } - }, - "eq7": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s2_collection" - }, - "y": "cwc" - } - }, - "if7": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "biopar1" + "sarbackscatter1": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, + "data": { + "from_node": "loadcollection5" + }, + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } }, - "reject": { - "from_node": "if6" + "renamelabels1": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter1" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } }, - "value": { - "from_node": "eq7" - } - } - }, - "loadcollection5": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "VV", - "VH" - ], - "id": "SENTINEL1_GRD", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "sarbackscatter1": { - "process_id": "sar_backscatter", - "arguments": { - "coefficient": "sigma0-ellipsoid", - "contributing_area": false, - "data": { - "from_node": "loadcollection5" - }, - "elevation_model": null, - "ellipsoid_incidence_angle": false, - "local_incidence_angle": false, - "mask": false, - "noise_removal": true - } - }, - "renamelabels1": { - "process_id": "rename_labels", - "arguments": { - "data": { - "from_node": "sarbackscatter1" - }, - "dimension": "bands", - "target": [ - "VV", - "VH" - ] - } - }, - "reducedimension2": { - "process_id": "reduce_dimension", - "arguments": { - "data": { - "from_node": "renamelabels1" - }, - "dimension": "bands", - "reducer": { - "process_graph": { - "arrayelement4": { - "process_id": "array_element", - "arguments": { + "reducedimension2": { + "process_id": "reduce_dimension", + "arguments": { "data": { - "from_parameter": "data" + "from_node": "renamelabels1" }, - "index": 1 - } - }, - "add3": { - "process_id": "add", - "arguments": { - "x": { - "from_node": "arrayelement4" + "dimension": "bands", + "reducer": { + "process_graph": { + "arrayelement4": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 1 + } + }, + "add3": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement4" + }, + "y": { + "from_node": "arrayelement4" + } + } + }, + "arrayelement5": { + "process_id": "array_element", + "arguments": { + "data": { + "from_parameter": "data" + }, + "index": 0 + } + }, + "add4": { + "process_id": "add", + "arguments": { + "x": { + "from_node": "arrayelement5" + }, + "y": { + "from_node": "arrayelement4" + } + } + }, + "divide2": { + "process_id": "divide", + "arguments": { + "x": { + "from_node": "add3" + }, + "y": { + "from_node": "add4" + } + }, + "result": true + } + } + } + } + }, + "adddimension2": { + "process_id": "add_dimension", + "arguments": { + "data": { + "from_node": "reducedimension2" + }, + "label": "RVI", + "name": "bands", + "type": "bands" + } + }, + "loadcollection6": { + "process_id": "load_collection", + "arguments": { + "bands": [ + "VV", + "VH" + ], + "id": "SENTINEL1_GRD", + "spatial_extent": { + "from_parameter": "spatial_extent" }, - "y": { - "from_node": "arrayelement4" + "temporal_extent": { + "from_parameter": "temporal_extent" } - } - }, - "arrayelement5": { - "process_id": "array_element", - "arguments": { + } + }, + "sarbackscatter2": { + "process_id": "sar_backscatter", + "arguments": { + "coefficient": "sigma0-ellipsoid", + "contributing_area": false, "data": { - "from_parameter": "data" + "from_node": "loadcollection6" }, - "index": 0 - } - }, - "add4": { - "process_id": "add", - "arguments": { + "elevation_model": null, + "ellipsoid_incidence_angle": false, + "local_incidence_angle": false, + "mask": false, + "noise_removal": true + } + }, + "renamelabels2": { + "process_id": "rename_labels", + "arguments": { + "data": { + "from_node": "sarbackscatter2" + }, + "dimension": "bands", + "target": [ + "VV", + "VH" + ] + } + }, + "eq8": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, "x": { - "from_node": "arrayelement5" + "from_parameter": "s1_collection" }, - "y": { - "from_node": "arrayelement4" + "y": "grd" + } + }, + "if8": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "renamelabels2" + }, + "reject": null, + "value": { + "from_node": "eq8" } - } - }, - "divide2": { - "process_id": "divide", - "arguments": { + } + }, + "eq9": { + "process_id": "eq", + "arguments": { + "case_sensitive": false, "x": { - "from_node": "add3" + "from_parameter": "s1_collection" + }, + "y": "rvi" + } + }, + "if9": { + "process_id": "if", + "arguments": { + "accept": { + "from_node": "adddimension2" + }, + "reject": { + "from_node": "if8" }, - "y": { - "from_node": "add4" + "value": { + "from_node": "eq9" } - }, - "result": true } - } - } - } - }, - "adddimension2": { - "process_id": "add_dimension", - "arguments": { - "data": { - "from_node": "reducedimension2" - }, - "label": "RVI", - "name": "bands", - "type": "bands" - } - }, - "loadcollection6": { - "process_id": "load_collection", - "arguments": { - "bands": [ - "VV", - "VH" - ], - "id": "SENTINEL1_GRD", - "spatial_extent": { - "from_parameter": "spatial_extent" - }, - "temporal_extent": { - "from_parameter": "temporal_extent" - } - } - }, - "sarbackscatter2": { - "process_id": "sar_backscatter", - "arguments": { - "coefficient": "sigma0-ellipsoid", - "contributing_area": false, - "data": { - "from_node": "loadcollection6" - }, - "elevation_model": null, - "ellipsoid_incidence_angle": false, - "local_incidence_angle": false, - "mask": false, - "noise_removal": true - } - }, - "renamelabels2": { - "process_id": "rename_labels", - "arguments": { - "data": { - "from_node": "sarbackscatter2" - }, - "dimension": "bands", - "target": [ - "VV", - "VH" - ] - } - }, - "eq8": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s1_collection" - }, - "y": "grd" - } - }, - "if8": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "renamelabels2" - }, - "reject": null, - "value": { - "from_node": "eq8" - } - } - }, - "eq9": { - "process_id": "eq", - "arguments": { - "case_sensitive": false, - "x": { - "from_parameter": "s1_collection" - }, - "y": "rvi" - } - }, - "if9": { - "process_id": "if", - "arguments": { - "accept": { - "from_node": "adddimension2" }, - "reject": { - "from_node": "if8" + "mergecubes1": { + "process_id": "merge_cubes", + "arguments": { + "cube1": { + "from_node": "if7" + }, + "cube2": { + "from_node": "if9" + } + } }, - "value": { - "from_node": "eq9" - } - } - }, - "mergecubes1": { - "process_id": "merge_cubes", - "arguments": { - "cube1": { - "from_node": "if7" - }, - "cube2": { - "from_node": "if9" + "applyneighborhood1": { + "process_id": "apply_neighborhood", + "arguments": { + "data": { + "from_node": "mergecubes1" + }, + "overlap": [], + "process": { + "process_graph": { + "runudf1": { + "process_id": "run_udf", + "arguments": { + "context": {}, + "data": { + "from_parameter": "data" + }, + "runtime": "Python", + "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path, temp_dir):\n \"\"\"\n Extracts a zip file into the given temporary directory.\n \"\"\"\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir) # Use the existing temp_dir\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves each top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n with tempfile.TemporaryDirectory() as temp_dir:\n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path, temp_dir) \n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\") \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" + }, + "result": true + } + } + }, + "size": [ + { + "dimension": "x", + "value": 32, + "unit": "px" + }, + { + "dimension": "y", + "value": 32, + "unit": "px" + } + ] + }, + "result": true } - } }, - "applyneighborhood1": { - "process_id": "apply_neighborhood", - "arguments": { - "data": { - "from_node": "mergecubes1" - }, - "overlap": [], - "process": { - "process_graph": { - "runudf1": { - "process_id": "run_udf", - "arguments": { - "context": {}, - "data": { - "from_parameter": "data" - }, - "runtime": "Python", - "udf": "#%%\n\nimport os\nimport sys\nimport zipfile\nimport requests\nimport tempfile\nimport shutil\nimport functools\n\nfrom openeo.udf import inspect\n\ndef download_file(url, path):\n \"\"\"\n Downloads a file from the given URL to the specified path.\n \"\"\"\n response = requests.get(url, stream=True)\n with open(path, \"wb\") as file:\n file.write(response.content)\n\ndef extract_zip_to_temp(zip_path):\n \"\"\"\n Extracts a zip file to a temporary directory.\n \"\"\"\n # Create a temporary directory\n temp_dir = tempfile.mkdtemp()\n\n # Extract the zip file to the temporary directory\n with zipfile.ZipFile(zip_path, \"r\") as zip_ref:\n zip_ref.extractall(temp_dir)\n\n return temp_dir\n\ndef move_top_level_folder_to_destination(temp_dir, destination_dir):\n \"\"\"\n Moves each top-level folder from the temporary directory to the destination directory.\n Throws an error if the folder already exists at the destination.\n \"\"\"\n # Find the top-level folders inside the extracted zip\n for item in os.listdir(temp_dir):\n item_path = os.path.join(temp_dir, item)\n \n if os.path.isdir(item_path):\n # Check if the folder already exists at destination\n dest_path = os.path.join(destination_dir, item)\n\n if os.path.exists(dest_path):\n # Throw an error if the folder already exists\n raise FileExistsError(f\"Error: The folder '{item}' already exists in the destination directory: {dest_path}\")\n\n # Move the folder out of temp and into the destination directory\n shutil.move(item_path, dest_path)\n\n\ndef add_to_sys_path(folder_path):\n \"\"\"\n Adds the folder path to sys.path.\n \"\"\"\n if folder_path not in sys.path:\n sys.path.append(folder_path)\n\n@functools.lru_cache(maxsize=5)\ndef setup_dependencies(dependencies_url):\n \"\"\"\n Main function to download, unzip, move the top-level folder, and add it to sys.path.\n \"\"\"\n # Create a temporary directory for extracted files\n temp_dir = tempfile.mkdtemp()\n \n # Step 1: Download the zip file\n zip_path = os.path.join(temp_dir, \"temp.zip\")\n download_file(dependencies_url, zip_path)\n\n inspect(message=\"Extract dependencies to temp\")\n # Step 2: Extract the zip file to the temporary directory\n extracted_dir = extract_zip_to_temp(zip_path)\n\n # Step 3: Move the first top-level folder (dynamically) to the destination\n destination_dir = os.getcwd() # Current working directory\n inspect(message=\"Move top-level folder to destination\")\n moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir)\n\n # Step 4: Add the folder to sys.path\n add_to_sys_path(moved_folder)\n inspect(message=\"Added to the sys path\")\n\n # Clean up the temporary zip file\n os.remove(zip_path)\n shutil.rmtree(temp_dir) # Remove the temporary extraction folder \n\n\nsetup_dependencies(\"https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip\")\nimport os\nimport sys\nfrom configparser import ConfigParser\nfrom pathlib import Path\nfrom typing import Dict\n\nfrom openeo.udf import XarrayDataCube\n\n\ndef load_venv():\n \"\"\"\n Add the virtual environment to the system path if the folder `/tmp/venv_static` exists\n :return:\n \"\"\"\n for venv_path in ['tmp/venv_static', 'tmp/venv']:\n if Path(venv_path).exists():\n sys.path.insert(0, venv_path)\n\n\ndef set_home(home):\n os.environ['HOME'] = home\n\n\ndef create_gpy_cfg():\n home = os.getenv('HOME')\n set_home('/tmp')\n user_file = Path.home() / '.config' / 'GPy' / 'user.cfg'\n if not user_file.exists():\n user_file.parent.mkdir(parents=True, exist_ok=True)\n return user_file, home\n\n\ndef write_gpy_cfg():\n user_file, home = create_gpy_cfg()\n config = ConfigParser()\n config['plotting'] = {\n 'library': 'none'\n }\n with open(user_file, 'w') as cfg:\n config.write(cfg)\n cfg.close()\n return home\n\n\ndef apply_datacube(cube: XarrayDataCube, context: Dict) -> XarrayDataCube:\n \"\"\"\n Apply mogpr integration to a datacube.\n MOGPR requires a full timeseries for multiple bands, so it needs to be invoked in the context of an apply_neighborhood process.\n @param cube:\n @param context:\n @return:\n \"\"\"\n load_venv()\n home = write_gpy_cfg()\n\n from fusets.mogpr import mogpr\n dims = cube.get_array().dims\n result = mogpr(cube.get_array().to_dataset(dim=\"bands\"))\n result_dc = XarrayDataCube(result.to_array(dim=\"bands\").transpose(*dims))\n set_home(home)\n return result_dc\n\n\ndef load_mogpr_udf() -> str:\n \"\"\"\n Loads an openEO udf that applies mogpr.\n @return:\n \"\"\"\n import os\n return Path(os.path.realpath(__file__)).read_text()\n" - }, - "result": true - } - } - }, - "size": [ - { - "dimension": "x", - "value": 32, - "unit": "px" - }, - { - "dimension": "y", - "value": 32, - "unit": "px" - } - ] - }, - "result": true - } - }, - "id": "fusets_mogpr", - "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", - "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n\n| Name | Description | Type | Default |\n| --------------- | -------------------------------------------------------------- | ------- | ------- |\n| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON | |\n| temporal_extent | Date range for which to apply the data fusion | Array | |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI |\n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Dependencies\n\nIn addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF):\n\n* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. \n\n* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html).\n\n\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", - "parameters": [ - { - "name": "spatial_extent", - "description": "Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent.", - "schema": [ + "id": "fusets_mogpr", + "summary": "Integrate S1 and S2 timeseries using multi-output gaussian process regression", + "description": "# Sentinel-1 and Sentinel-2 data fusion through Multi-output Gaussian process regression (MOGPR)\n\nThis service is designed to enable multi-output regression analysis using Gaussian Process Regression (GPR) on geospatial data. It provides a powerful tool for understanding and predicting spatiotemporal phenomena by filling gaps based on other correlated indicators. This service focuses on fusing Sentinel-1 and Sentinel-2 data, allowing the user to select one of the predefined data sources.\n\n## Parameters\n\nThe `fusets_mogpr_s1s2` service requires the following parameters:\n\n\n| Name | Description | Type | Default |\n| --------------- | -------------------------------------------------------------- | ------- | ------- |\n| spatial_extent | Polygon representing the AOI on which to apply the data fusion | GeoJSON | |\n| temporal_extent | Date range for which to apply the data fusion | Array | |\n| s1_collection | S1 data collection to use for the fusion | Text | RVI |\n| s2_collection | S2 data collection to use for fusing the data | Text | NDVI |\n\n## Supported collections\n\n#### Sentinel-1\n\n* RVI\n* GRD\n\n#### Sentinel-2\n\n* NDVI\n* FAPAR\n* LAI\n* FCOVER\n* EVI\n* CCC\n* CWC\n\n## Limitations\n\nThe spatial extent is limited to a maximum size equal to a Sentinel-2 MGRS tile (100 km x 100 km).\n\n## Dependencies\n\nIn addition to various Python libraries, the workflow utilizes the following libraries included in the User-Defined Function (UDF):\n\n* Biopar: The `biopar` package retrieves biophysical parameters like FAPAR, FCOVER, and more, that were passed as the S2_collection. The biopar package is a Python package that calculates biophysical parameters from Sentinel-2 satellite images as described [here](https://step.esa.int/docs/extra/ATBD_S2ToolBox_L2B_V1.1.pdf). The `fusets_mogpr` udp directly uses the biopar udp shared in the APEX Algorithms repository. \n\n* FuseTS: The `fusets` library was developed to facilitate data fusion and time-series analytics using AI/ML to extract insights about land environments. It functions as a Time Series & Data Fusion toolbox integrated with openEO. For additional information, please refer to the [FuseTS documentation](https://open-eo.github.io/FuseTS/installation.html).\n\n\n\n## Output\n\nThis User-Defined-Process (UDP) produces a datacube that contains a gap-filled time series for all pixels within the specified temporal and spatial range. This datacube can be seamlessly integrated with other openEO processes.", + "parameters": [ { - "title": "Bounding Box", - "type": "object", - "subtype": "bounding-box", - "required": [ - "west", - "south", - "east", - "north" - ], - "properties": { - "west": { - "description": "West (lower left corner, coordinate axis 1).", - "type": "number" - }, - "south": { - "description": "South (lower left corner, coordinate axis 2).", - "type": "number" - }, - "east": { - "description": "East (upper right corner, coordinate axis 1).", - "type": "number" - }, - "north": { - "description": "North (upper right corner, coordinate axis 2).", - "type": "number" - }, - "base": { - "description": "Base (optional, lower left corner, coordinate axis 3).", - "type": [ - "number", - "null" - ], - "default": null - }, - "height": { - "description": "Height (optional, upper right corner, coordinate axis 3).", - "type": [ - "number", - "null" - ], - "default": null - }, - "crs": { - "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", - "anyOf": [ + "name": "spatial_extent", + "description": "Limits the data to process to the specified bounding box or polygons.\\n\\nFor raster data, the process loads the pixel into the data cube if the point at the pixel center intersects with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC).\\nFor vector data, the process loads the geometry into the data cube if the geometry is fully within the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). Empty geometries may only be in the data cube if no spatial extent has been provided.\\n\\nEmpty geometries are ignored.\\nSet this parameter to null to set no limit for the spatial extent.", + "schema": [ { - "title": "EPSG Code", - "type": "integer", - "subtype": "epsg-code", - "minimum": 1000, - "examples": [ - 3857 - ] + "title": "Bounding Box", + "type": "object", + "subtype": "bounding-box", + "required": [ + "west", + "south", + "east", + "north" + ], + "properties": { + "west": { + "description": "West (lower left corner, coordinate axis 1).", + "type": "number" + }, + "south": { + "description": "South (lower left corner, coordinate axis 2).", + "type": "number" + }, + "east": { + "description": "East (upper right corner, coordinate axis 1).", + "type": "number" + }, + "north": { + "description": "North (upper right corner, coordinate axis 2).", + "type": "number" + }, + "base": { + "description": "Base (optional, lower left corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "height": { + "description": "Height (optional, upper right corner, coordinate axis 3).", + "type": [ + "number", + "null" + ], + "default": null + }, + "crs": { + "description": "Coordinate reference system of the extent, specified as as [EPSG code](http://www.epsg-registry.org/) or [WKT2 CRS string](http://docs.opengeospatial.org/is/18-010r7/18-010r7.html). Defaults to `4326` (EPSG code 4326) unless the client explicitly requests a different coordinate reference system.", + "anyOf": [ + { + "title": "EPSG Code", + "type": "integer", + "subtype": "epsg-code", + "minimum": 1000, + "examples": [ + 3857 + ] + }, + { + "title": "WKT2", + "type": "string", + "subtype": "wkt2-definition" + } + ], + "default": 4326 + } + } }, { - "title": "WKT2", - "type": "string", - "subtype": "wkt2-definition" + "title": "Vector data cube", + "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", + "type": "object", + "subtype": "datacube", + "dimensions": [ + { + "type": "geometry" + } + ] + }, + { + "title": "No filter", + "description": "Don't filter spatially. All data is included in the data cube.", + "type": "null" } - ], - "default": 4326 - } - } + ] }, { - "title": "Vector data cube", - "description": "Limits the data cube to the bounding box of the given geometries in the vector data cube. For raster data, all pixels inside the bounding box that do not intersect with any of the polygons will be set to no data (`null`). Empty geometries are ignored.", - "type": "object", - "subtype": "datacube", - "dimensions": [ - { - "type": "geometry" + "name": "temporal_extent", + "description": "Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion", + "schema": { + "type": "array", + "subtype": "temporal-interval", + "uniqueItems": true, + "minItems": 2, + "maxItems": 2, + "items": { + "anyOf": [ + { + "type": "string", + "subtype": "date-time", + "format": "date-time" + }, + { + "type": "string", + "subtype": "date", + "format": "date" + }, + { + "type": "null" + } + ] + } } - ] }, { - "title": "No filter", - "description": "Don't filter spatially. All data is included in the data cube.", - "type": "null" - } - ] - }, - { - "name": "temporal_extent", - "description": "Temporal extent specified as two-element array with start and end date/date-time. \nThis is date range for which to apply the data fusion", - "schema": { - "type": "array", - "subtype": "temporal-interval", - "uniqueItems": true, - "minItems": 2, - "maxItems": 2, - "items": { - "anyOf": [ - { - "type": "string", - "subtype": "date-time", - "format": "date-time" + "name": "s1_collection", + "description": "S1 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "RVI", + "GRD" + ] }, - { - "type": "string", - "subtype": "date", - "format": "date" + "default": "RVI", + "optional": true + }, + { + "name": "s2_collection", + "description": "S2 data collection to use for fusing the data", + "schema": { + "type": "string", + "enum": [ + "NDVI", + "FAPAR", + "LAI", + "FCOVER", + "EVI", + "CCC", + "CWC" + ] }, - { - "type": "null" - } - ] + "default": "NDVI", + "optional": true } - } - }, - { - "name": "s1_collection", - "description": "S1 data collection to use for fusing the data", - "schema": { - "type": "string", - "enum": [ - "RVI", - "GRD" - ] - }, - "default": "RVI", - "optional": true - }, - { - "name": "s2_collection", - "description": "S2 data collection to use for fusing the data", - "schema": { - "type": "string", - "enum": [ - "NDVI", - "FAPAR", - "LAI", - "FCOVER", - "EVI", - "CCC", - "CWC" - ] - }, - "default": "NDVI", - "optional": true - } - ] + ] } \ No newline at end of file diff --git a/openeo_udp/fusets_mogpr/set_path.py b/openeo_udp/fusets_mogpr/set_path.py index b16ae218..7b06dc3a 100644 --- a/openeo_udp/fusets_mogpr/set_path.py +++ b/openeo_udp/fusets_mogpr/set_path.py @@ -18,17 +18,12 @@ def download_file(url, path): with open(path, "wb") as file: file.write(response.content) -def extract_zip_to_temp(zip_path): +def extract_zip_to_temp(zip_path, temp_dir): """ - Extracts a zip file to a temporary directory. + Extracts a zip file into the given temporary directory. """ - # Create a temporary directory - temp_dir = tempfile.mkdtemp() - - # Extract the zip file to the temporary directory with zipfile.ZipFile(zip_path, "r") as zip_ref: - zip_ref.extractall(temp_dir) - + zip_ref.extractall(temp_dir) # Use the existing temp_dir return temp_dir def move_top_level_folder_to_destination(temp_dir, destination_dir): @@ -59,34 +54,29 @@ def add_to_sys_path(folder_path): if folder_path not in sys.path: sys.path.append(folder_path) + @functools.lru_cache(maxsize=5) def setup_dependencies(dependencies_url): """ Main function to download, unzip, move the top-level folder, and add it to sys.path. """ - # Create a temporary directory for extracted files - temp_dir = tempfile.mkdtemp() - - # Step 1: Download the zip file - zip_path = os.path.join(temp_dir, "temp.zip") - download_file(dependencies_url, zip_path) - - inspect(message="Extract dependencies to temp") - # Step 2: Extract the zip file to the temporary directory - extracted_dir = extract_zip_to_temp(zip_path) - - # Step 3: Move the first top-level folder (dynamically) to the destination - destination_dir = os.getcwd() # Current working directory - inspect(message="Move top-level folder to destination") - moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir) - - # Step 4: Add the folder to sys.path - add_to_sys_path(moved_folder) - inspect(message="Added to the sys path") - - # Clean up the temporary zip file - os.remove(zip_path) - shutil.rmtree(temp_dir) # Remove the temporary extraction folder + with tempfile.TemporaryDirectory() as temp_dir: + # Step 1: Download the zip file + zip_path = os.path.join(temp_dir, "temp.zip") + download_file(dependencies_url, zip_path) + + inspect(message="Extract dependencies to temp") + # Step 2: Extract the zip file to the temporary directory + extracted_dir = extract_zip_to_temp(zip_path, temp_dir) + + # Step 3: Move the first top-level folder (dynamically) to the destination + destination_dir = os.getcwd() # Current working directory + inspect(message="Move top-level folder to destination") + moved_folder = move_top_level_folder_to_destination(extracted_dir, destination_dir) + + # Step 4: Add the folder to sys.path + add_to_sys_path(moved_folder) + inspect(message="Added to the sys path") setup_dependencies("https://artifactory.vgt.vito.be:443/artifactory/auxdata-public/ai4food/fusets_venv.zip") \ No newline at end of file From 0e6204c4ac4777f376143159de97e206b9992be7 Mon Sep 17 00:00:00 2001 From: Pratichhya <39898768+Pratichhya@users.noreply.github.com> Date: Thu, 6 Feb 2025 15:18:43 +0100 Subject: [PATCH 26/26] removed save result --- benchmark_scenarios/fusets_mogpr.json | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/benchmark_scenarios/fusets_mogpr.json b/benchmark_scenarios/fusets_mogpr.json index cb415483..1abf4dcf 100644 --- a/benchmark_scenarios/fusets_mogpr.json +++ b/benchmark_scenarios/fusets_mogpr.json @@ -43,17 +43,6 @@ }, "namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/09413be3c27e0e695d426c9ffe5a0fe90beefe65/openeo_udp/fusets_mogpr/fusets_mogpr.json", "process_id": "fusets_mogpr" - }, - "saveresult1": { - "arguments": { - "data": { - "from_node": "fusetsmogpr" - }, - "format": "netCDF", - "options": {} - }, - "process_id": "save_result", - "result": true } }, "reference_data": {