Skip to content

Random forest #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
[
{
"id": "forest_fire_mapping",
"type": "openeo",
"description": "Forest Fire Mapping using Random Forest based on Sentinel-2 and Sentinel-1 data",
"backend": "openeo.vito.be",
"process_graph": {
"randomforestfiremapping1": {
"arguments": {
"spatial_extent": {
"coordinates": [
[
[
-17.996638457335074,
28.771993378019005
],
[
-17.960989271845406,
28.822652746872745
],
[
-17.913144312372435,
28.85454938652139
],
[
-17.842315009623224,
28.83015783855478
],
[
-17.781805207936817,
28.842353612538087
],
[
-17.728331429702315,
28.74103487483061
],
[
-17.766795024572748,
28.681932277834584
],
[
-17.75131577297855,
28.624236885528937
],
[
-17.756944591740076,
28.579206335436727
],
[
-17.838093395552082,
28.451150708612
],
[
-17.871397239891113,
28.480702007110015
],
[
-17.88969090086607,
28.57404658490533
],
[
-17.957705794234517,
28.658947934558352
],
[
-18.003674480786984,
28.76167387695621
],
[
-18.003674480786984,
28.76167387695621
],
[
-17.996638457335074,
28.771993378019005
]
]
],
"type": "Polygon"
},
"temporal_extent": [
"2023-07-15",
"2023-09-15"
]
},
"namespace": "https://raw.githubusercontent.com/ESA-APEx/apex_algorithms/8ace24cf9853989b964e65246249e1379833be9d/algorithm_catalog/vito/random_forest_firemapping/openeo_udp/random_forest_firemapping.json",
"process_id": "random_forest_firemapping"
}
},
"reference_data": {
"output.tif": "https://s3.waw3-1.cloudferro.com/swift/v1/apex-examples/random_forest/random_forest_firemapping.tif"
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import openeo
from openeo import Connection
from openeo.extra.spectral_indices import compute_indices
from openeo.processes import array_create



def s1_features(connection: Connection, date, aoi, reducer):

"""
Preprocess Sentinel-1 SAR data by applying backscatter correction,
computing VH/VV ratio and log transformations, then reducing over time.

Args:
connection (Connection): An openEO connection.
date (Tuple[str, str]): Temporal extent as (start_date, end_date)
aoi (dict): Spatial extent
reducer (Any): Reducer (first, last, mean, median)

Returns:
DataCube: The processed and temporally reduced Sentinel-1 data cube.
"""
# load S2 pre-collection
s1_cube = connection.load_collection(
"SENTINEL1_GRD",
temporal_extent=date,
spatial_extent=aoi,
bands=["VH", "VV"]
)

# apply SAR backscatter processing to the collection
s1_cube = s1_cube.sar_backscatter(coefficient="sigma0-ellipsoid")

# apply band-wise processing to create a ratio and log-transformed bands
s1_cube = s1_cube.apply_dimension(
dimension="bands",
process=lambda x: array_create(
[
30.0 * x[0] / x[1], # Ratio of VH to VV
30.0 + 10.0 * x[0].log(base=10), # Log-transformed VH
30.0 + 10.0 * x[1].log(base=10), # Log-transformed VV
]
),
)

s1_cube = s1_cube.rename_labels("bands", ["ratio"] + s1_cube.metadata.band_names)

# scale to int16
s1_cube = s1_cube.linear_scale_range(0, 30, 0, 30000)

return s1_cube.reduce_dimension(reducer=reducer, dimension="t")

def s2_features(connection: Connection, date, aoi, reducer):
"""
Preprocess Sentinel-2 data by loading relevant bands, applying scaling,
and reducing over time using a specified reducer.

Args:
connection (Connection): An openEO connection.
date: Temporal extent as (start_date, end_date)
aoi: Spatial extent
reducer (Any): Reducer (first, last, mean, median)

Returns:
DataCube: The processed and temporally reduced Sentinel-2 datacube.
"""
# load S2 pre-collection
s2_cube = connection.load_collection(
"SENTINEL2_L2A",
temporal_extent=date,
spatial_extent=aoi,
bands=["B02", "B03", "B04", "B08","B12"],
max_cloud_cover=80,
)

scl = connection.load_collection(
"SENTINEL2_L2A",
temporal_extent=date,
spatial_extent=aoi,
bands=["SCL"],
max_cloud_cover=80,
)

mask = scl.process(
"to_scl_dilation_mask",
data=scl,
kernel1_size=17,
kernel2_size=77,
mask1_values=[2, 4, 5, 6, 7],
mask2_values=[3, 8, 9, 10, 11],
erosion_kernel_size=3
)

# Create a cloud-free mosaic
masked_cube = s2_cube.mask(mask)
cf_cube = masked_cube.reduce_dimension(reducer=reducer, dimension="t")

# calculate all indices
indices_list = ["NBR", "BAI"]
indices = compute_indices(cf_cube, indices_list)

# calculate texture features
features_udf = openeo.UDF.from_file("features.py")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give features.py a more descriptive name?

features = cf_cube.apply_neighborhood(
process=features_udf,
size=[
{"dimension": "x", "value": 128, "unit": "px"},
{"dimension": "y", "value": 128, "unit": "px"},
],
overlap=[
{"dimension": "x", "value": 32, "unit": "px"},
{"dimension": "y", "value": 32, "unit": "px"},
],
)

# combine the original bands with the computed indices,
merged_cube = cf_cube.merge_cubes(indices)
merged_cube = merged_cube.merge_cubes(features)
return merged_cube
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import xarray
import numpy as np
from skimage.feature import graycomatrix, graycoprops
from openeo.metadata import CollectionMetadata


def apply_metadata(metadata: CollectionMetadata, context: dict) -> CollectionMetadata:
return metadata.rename_labels(
dimension = "bands",
target = ["contrast","variance","NDFI"]
)


def apply_datacube(cube: xarray.DataArray, context: dict) -> xarray.DataArray:
"""
Applies spatial texture analysis and spectral index computation to a Sentinel-2 data cube.

Computes:
- NDFI (Normalized Difference Fraction Index) from bands B08 and B12
- Texture features (contrast and variance) using Gray-Level Co-occurrence Matrix (GLCM)

Args:
cube (xarray.DataArray): A 3D data cube with dimensions (bands, y, x) containing at least bands B08 and B12.
context (dict): A context dictionary (currently unused, included for API compatibility).

Returns:
xarray.DataArray: A new data cube with dimensions (bands, y, x) containing:
- 'contrast': GLCM contrast
- 'variance': GLCM variance
- 'NDFI': Normalised Difference Fire Index
"""

# Parameters
window_size = 33
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

are these indeed fixed for these network; or should they be a tunable parameter?

pad = window_size // 2
levels = 256 # For 8-bit images

# Load Data
# data = cube.values # shape: (t, bands, y, x)

#first get NDFI
b08 = cube.sel(bands="B08")
b12 = cube.sel(bands="B12")

# Compute mean values
avg_b08 = b08.mean()
avg_b12 = b12.mean()

# Calculate NDFI
ndfi = ((b12 / avg_b12) - (b08 / avg_b08)) / (b08 / avg_b08)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this one unavailble in the indiches package?


# Padding the image to handle border pixels for GLCM
padded = np.pad(b12, pad_width=pad, mode='reflect')

# Normalize to 0–255 range
img_norm = (padded - padded.min()) / (padded.max() - padded.min())
padded = (img_norm * 255).astype(np.uint8)

# Initialize feature maps
shape = b12.shape
contrast = np.zeros(shape)
variance = np.zeros(shape)

for i in range(pad, pad + shape[0]):
for j in range(pad, pad + shape[1]):
window = padded[i - pad:i + pad + 1, j - pad:j + pad + 1]

# Compute GLCM
glcm = graycomatrix(window, distances=[5], angles=[0], levels=levels, symmetric=True, normed=True)

# Texture features
contrast[i - pad, j - pad] = graycoprops(glcm, 'contrast')[0, 0]
variance[i - pad, j - pad] = np.var(window)

all_texture = np.stack([contrast,variance,ndfi])
# create a data cube with all the calculated properties
textures = xarray.DataArray(
data=all_texture,
dims=["bands", "y", "x"],
coords={"bands": ["contrast","variance","NDFI"], "y": cube.coords["y"], "x": cube.coords["x"]},
)

return textures
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import json
from pathlib import Path
import openeo
from openeo.api.process import Parameter
from openeo.rest.udp import build_process_dict

from eo_extractor import s1_features , s2_features


def generate() -> dict:

connection = openeo.connect("openeo.vito.be").authenticate_oidc()

# define input parameter
spatial_extent = Parameter.spatial_extent(
name = "spatial_extent",
description = "Limits the data to process to the specified bounding box or polygons."
)

temporal_extent = Parameter.temporal_interval(
name = "temporal_extent",
description = "Temporal extent specified as two-element array with start and end date/date-time."
)

# Load s1 and s2 features
s1_feature_cube = s1_features(connection,temporal_extent,spatial_extent,"median")
s2_feature_cube = s2_features(connection,temporal_extent,spatial_extent,"median")
# Merge the two feature cubes
inference_cube = s2_feature_cube.merge_cubes(s1_feature_cube)

# link to the trained model: this model has an expiry date
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

at a standard model on S3 to be used; can check with core team the preferred location

model = "https://openeo.vito.be/openeo/1.2/jobs/j-25071606273445d397068beb485ed457/results/items/ZWNjZTlmZWEwNGI4YzljNzZhYzc2YjQ1YjZiYTAwYzIwZjIxMWJkYTQ4NTZjMTRhYTQ0NzViOGU4ZWQ0MzNjZEBlZ2kuZXU=/3d1f92a8205422f7a7cc9a83f908a2ac/ml_model_metadata.json?expires=1753253368"

# predict of training data
inference = inference_cube.predict_random_forest(
model=model,
dimension="bands"
)

# Build the process dictionary
return build_process_dict(
process_graph=inference,
process_id="random_forest_firemapping",
summary="Forest Fire Mapping Using Random Forest in openEO",
description="Forest fire mapping is a critical tool for environmental monitoring and disaster management, enabling the timely detection and assessment of burned areas. This service is build upon techniques described in the research paper by Zhou, Bao et al., which introduces a machine learning–based approach using Sentinel-2 imagery. Their method combines spectral, topographic, and textural features to improve classification accuracy, particularly emphasising GLCM texture features extracted from Sentinel-2's short-wave infrared band.Thus, the UDP performs forest fire mapping using a pre-trained Random Forest model in openEO. It combines Sentinel-1 and Sentinel-2 features, applies the model, and outputs the predicted fire mapping results.",
parameters=[spatial_extent, temporal_extent]
)


if __name__ == "__main__":
# save the generated process to a file
with open(Path(__file__).parent / "random_forest_firemapping.json", "w") as f:
json.dump(generate(), f, indent=2)
Loading